Python Dataset 예제들, rdflib.Dataset Python 예제들

예제 #1

0

파일 보기

파일: test_dataset.py 프로젝트: rancas/rdflib

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

예제 #2

0

파일 보기

파일: RDFExporter.py 프로젝트: swupj1995/CaLi-Search-Engine

def get_fragment(request, subject, predicate, obj, page, graph):
    fragment = Dataset()
    tpf_url = urlparse(request.build_absolute_uri())
    tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph)
    licenses = []
    neo_licenses = LicenseModel.nodes.filter(graph__exact=graph)
    if subject and subject.startswith(LICENSE_SUBJECT_PREFIX):
        license_id = subject.split('/')[-1]
        neo_licenses.filter(hashed_sets__exact=license_id)
    for neo_license in neo_licenses:
        license_object = ObjectFactory.objectLicense(neo_license)
        license_object = license_object.to_json()
        license_object['compatible_licenses'] = []
        for compatible_neo_license in neo_license.followings.all():
            compatible_license = ObjectFactory.objectLicense(
                compatible_neo_license)
            license_object['compatible_licenses'].append(
                compatible_license.hash())
        licenses.append(license_object)
    rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj))
    total_nb_triples = 0
    for s, p, o in rdf_licenses:
        fragment.add((s, p, o))
        total_nb_triples += 1
    last_result = True
    nb_triple_per_page = total_nb_triples
    _frament_fill_meta(subject, predicate, obj, page, graph, fragment,
                       last_result, total_nb_triples, nb_triple_per_page,
                       request, tpf_url)
    return fragment

예제 #3

0

파일 보기

파일: long_term_memory.py 프로젝트: jenbishop/pepper

    def __init__(self, address=config.BRAIN_URL_LOCAL):
        """
        Interact with Triple store

        Parameters
        ----------
        address: str
            IP address and port of the Triple store
        """

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = read_query('prefixes')

        self._define_namespaces()
        self._get_ontology_path()
        self._bind_namespaces()

        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

        self._brain_log = config.BRAIN_LOG_ROOT.format(
            datetime.now().strftime('%Y-%m-%d-%H-%M'))

        # Launch first query
        self.count_statements()

예제 #4

0

파일 보기

 def open(self):
     # XXX: If we have a source that's read only, should we need to set the
     # store separately??
     g0 = Dataset('SPARQLUpdateStore', default_union=True)
     g0.open(tuple(self.conf['rdf.store_conf']))
     self.graph = g0
     return self.graph

예제 #5

0

파일 보기

파일: ldp_db.py 프로젝트: luoyu357/CollectionAPIVersion.1

    def set_member(self, c_id, m_obj):
        if isinstance(m_obj, Model):
            m_obj = [m_obj]
        elif not isinstance(m_obj, list):
            raise ParseError()

        c_ldp_id = self.marmotta.ldp(encoder.encode(c_id))
        collection = self.get_collection(c_id).pop() # 404 if collection not found

        if len(set([m.id for m in m_obj])) is not len(m_obj):
            raise ForbiddenError()
        if not collection.capabilities.membershipIsMutable:
            raise ForbiddenError()
        if collection.capabilities.restrictedToType:
            for m in m_obj:
                if not(hasattr(m,"datatype") and m.datatype in collection.capabilities.restrictedToType):
                    raise ForbiddenError()
        if collection.capabilities.maxLength >= 0:
            size = self.sparql.size(c_ldp_id).bindings.pop().get(Variable('size'))
            if int(size) > collection.capabilities.maxLength-len(m_obj):
                raise ForbiddenError()#"Operation forbidden. Collection of maximum size {} is full.".format(collection.capabilities.maxLength))

        ds = Dataset()
        ldp = ds.graph(identifier=LDP.ns)
        for m in m_obj:
            m_id = self.marmotta.ldp(encoder.encode(c_id)+"/member/"+encoder.encode(m.id))
            member = ds.graph(identifier=m_id)
            member += self.RDA.object_to_graph(member.identifier,m)
            ldp += LDP.add_contains(c_ldp_id+"/member",m_id,False)
        res = self.sparql.insert(ds)
        if res.status_code is not 200:
            raise DBError()
        return m_obj

예제 #6

0

파일 보기

파일: test_dataset.py 프로젝트: wayne9qiu/rdflib

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite")
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef("urn:michel")
        self.tarek = URIRef("urn:tarek")
        self.bob = URIRef("urn:bob")
        self.likes = URIRef("urn:likes")
        self.hates = URIRef("urn:hates")
        self.pizza = URIRef("urn:pizza")
        self.cheese = URIRef("urn:cheese")

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef("urn:context-1")
        self.c2 = URIRef("urn:context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

예제 #7

0

파일 보기

def open_db(path=DEFAULT_DATABASE_PATH):
    my_graph = Dataset('Sleepycat')
    store_state = my_graph.open(path, create=False)

    assert store_state != NO_STORE, 'Store does not exist'
    assert store_state == VALID_STORE, 'The underlying store is corrupt'

    return my_graph

예제 #8

0

파일 보기

 def open(self):
     import logging
     # XXX: If we have a source that's read only, should we need to set the
     # store separately??
     g0 = Dataset('Sleepycat', default_union=True)
     self.conf['rdf.store'] = 'Sleepycat'
     g0.open(self.conf['rdf.store_conf'], create=True)
     self.graph = g0
     logging.debug("Opened SleepyCatSource")

예제 #9

0

파일 보기

파일: act_to_skos.py 프로젝트: HOT-Ecosystem/act-to-rdf

def dump_as_rdf(g: Dataset, table_name: str) -> bool:
    """
    Dump the contents of Graph g in RDF turtle
    :param g: Dataset to dump
    :param table_name: name of the base table
    :return: success indicator
    """

    # Propagate the mapped concepts up the tree
    def add_to_ancestors(s: URIRef, vm: URIRef):
        g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm))
        for parent in g.objects(s, SKOS.broader):
            add_to_ancestors(parent, vm)

    if COMPUTE_MEMBERS and EXPLICIT_MEMBERS:
        for subj, obj in g.subject_objects(SKOS.exactMatch):
            add_to_ancestors(subj, obj)
        # TODO: this gives us a list of all concepts in the scheme... useful?
        for scheme, tc in g.subject_objects(SKOS.hasTopConcept):
            for member in g.objects(
                    tc, ISO['enumeratedConceptualDomain.hasMember']):
                g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'],
                       member))

    for name, ns in namespaces.items():
        g.bind(name.lower(), ns)
    outfile = os.path.join(DATA_DIR, table_name + '.ttl')
    print(f"Saving output to {outfile}")
    g.serialize(outfile, format='turtle')
    print(f"{len(g)} triples written")
    return True

예제 #10

0

파일 보기

    def test_load_from_file(self):

        ds = Dataset()
        ds.parse("geoStatements.trig", format="trig")

        async def f():
            await self.aiotest.addN(
                (i for i in ds.quads((None, None, None, None))))

        print("ds loaded")
        self.loop.run_until_complete(asyncio.gather(f()))

예제 #11

0

파일 보기

파일: ldp_db.py 프로젝트: luoyu357/CollectionAPIVersion.1

 def set_service(self, s_obj):
     ds = Dataset()
     service = ds.graph(identifier=self.marmotta.ldp("service"))
     service += self.RDA.object_to_graph(service.identifier, s_obj)
     ldp = ds.graph(identifier=LDP.ns)
     ldp += LDP.add_contains(self.marmotta.ldp(),service.identifier,False)
     response = self.sparql.insert(ds)
     if response.status_code is 200:
         return s_obj
     else:
         raise DBError()

예제 #12

0

파일 보기

파일: configure_test_server.py 프로젝트: engsterhold/aiohttp_rdf4j

def load_statements():

    a = datetime.datetime.now()
    logger.info(f"start loading ds at: {a}")
    ds = Dataset()
    ds.parse(STATEMENTS, format=TYPE)
    b = datetime.datetime.now()
    logger.info(f"finished loading ds at: {b}")
    logger.info(f"ds loaded: {ds}")
    logger.info(f"ds loaded in {b - a}")
    return ds

예제 #13

0

파일 보기

 def __init__(self,
              namespace,
              showObjs=True,
              showClasses=False,
              showNamespace=True):
     self.ds = Dataset()
     self.d = UmlPygraphVizDiagram()
     self.show_objs = showObjs
     self.show_classes = showClasses
     self.namespace = namespace
     self.show_namespaces = showNamespace
     self.add_namespaces(self.namespace)

예제 #14

0

파일 보기

파일: rdf.py 프로젝트: GSS-Cogs/gss-utils

def _get_single_graph_from_trig(trig_file: Optional[str] = None,
                                data: Optional[str] = None) -> rdflib.Graph:
    if trig_file is None and data is None:
        raise RuntimeError("One of trig_file OR data *must* be specified.")

    dataset = Dataset()
    dataset.parse(format="trig", source=trig_file, data=data)
    graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0]
    assert (
        len(graphs_with_triples) == 1
    ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one."
    return graphs_with_triples[0]

예제 #15

0

파일 보기

파일: rsrc_centric_layout.py 프로젝트: whikloj/lakesuperior

    def __init__(self, config):
        """Initialize the graph store and a layout.

        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
        for Graph Store HTTP protocol
        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
        this only in the (currently unreleased) 2.2 branch. It works with Jena,
        which is currently the reference implementation.
        """
        self.config = config
        self.store = plugin.get('Lmdb', Store)(config['location'])
        self.ds = Dataset(self.store, default_union=True)
        self.ds.namespace_manager = nsm

예제 #16

0

파일 보기

파일: csvw.py 프로젝트: RinkeHoekstra/COW

    def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format):
        self.ds = Dataset()
        # self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(identifier))

        self.columns = columns
        self.schema = schema
        self.metadata_graph = metadata_graph
        self.encoding = encoding
        self.output_format = output_format

        self.templates = {}

        self.aboutURLSchema = self.schema.csvw_aboutUrl

예제 #17

0

파일 보기

    def __init__(self):
        # type: () -> RdfBuilder

        self.ontology_paths = {}
        self.namespaces = {}
        self.dataset = Dataset()

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

        self._define_namespaces()
        self._bind_namespaces()
        self.define_named_graphs()
        self.load_ontology_integration()

예제 #18

0

파일 보기

def fetch(endpoint, timeout=0):
    store = SPARQLStore(endpoint)
    ds = Dataset(store)
    for rs_name, rs_uri in get_rule_sets(endpoint + rs_table_page):
        # TODO: maybe do not discrad but try to merge? no.
        if rs_uri not in rule_sets:
            # TODO: handle possible query error?
            gr = ds.get_context(rs_uri)
            try:
                rs_triples = gr.query(q)
                yield rs_name, rs_uri, rs_triples
                time.sleep(timeout)
            except:
                print('error with', rs_uri)
                other_rs.append(rs_uri)

예제 #19

0

파일 보기

파일: test_dataset.py 프로젝트: JesusPatate/rdflib

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

예제 #20

0

파일 보기

파일: act_to_skos.py 프로젝트: HOT-Ecosystem/act-to-rdf

def proc_table_access_table(opts: argparse.Namespace) -> int:
    """
    Iterate over the table_access table emitting its entries
    :param opts: function arguments
    :return: Graph
    """
    logging.info("Iterating over table_access table")
    process_parsed_args(opts, FileAwareParser.error)
    queries = QueryTexts(I2B2Tables(opts))
    q = queries.ont_session.query(queries.tables.table_access)
    e: TableAccess
    for e in q.all():
        print(f"{e.c_table_cd}", end='')
        if not e.c_table_cd.startswith(
                TABLE_PREFIX) or e.c_table_cd in SKIP_TABLES:
            print(" skipped")
            continue
        g = Dataset()
        nelements = proc_table_access_row(queries, e, g)
        if nelements:
            print(f" {nelements} elements processed")
            dump_as_rdf(g, e.c_table_cd)
            if ONE_TABLE:
                break
    else:
        nelements = 0
    return nelements

예제 #21

0

파일 보기

class DefaultSource(RDFSource):
    """ Reads from and queries against a configured database.

        The default configuration.

        The database store is configured with::

            "rdf.source" = "default"
            "rdf.store" = <your rdflib store name here>
            "rdf.store_conf" = <your rdflib store configuration here>

        Leaving unconfigured simply gives an in-memory data store.
    """
    def open(self):
        self.graph = Dataset(self.conf['rdf.store'], default_union=True)
        self.graph.open(self.conf['rdf.store_conf'], create=True)

예제 #22

0

파일 보기

 def rdf(self):
     try:
         return self.conf['rdf.graph']
     except KeyError:
         if ALLOW_UNCONNECTED_DATA_USERS:
             return Dataset(default_union=True)
         raise DataUserUnconnected('No rdf.graph')

예제 #23

0

파일 보기

    def test_simple(self):
        a = datetime.datetime.now()
        seed = [(URIRef(f"urn:example.com/mock/id{i}"),
                 URIRef(f"urn:example.com/mock/rel{i}"),
                 Literal(f"mock-val{i}"),
                 URIRef(f"urn:example.com/mock/context{j}"))
                for i in range(100) for j in range(100)]

        async def seed_store():
            await self.aiotest.addN(seed)

        g, cg, ds = Graph(), ConjunctiveGraph(), Dataset(default_union=True)
        loop = asyncio.get_event_loop()
        loop.run_until_complete(asyncio.gather(seed_store()))

        b = datetime.datetime.now()
        print("seed time ->", b - a)

        async def f():
            for i in (g, cg, ds):
                await async_fill_graph(i, self.aiotest.statements())

        loop.run_until_complete(asyncio.gather(f()))

        for i in (g, cg, ds):
            print(len(i))

        # print("g", [i for i in g])
        # print("cg", [i for i in cg])
        # print("ds", [(i, g.identifier) for i in g for g in ds.graphs()])

        c = datetime.datetime.now()
        print("graph time ->", c - b)

        print("complete time ->", c - a)

예제 #24

0

파일 보기

파일: General.py 프로젝트: knaw-huc/lenticular-lens

def validateRDF(file):

    center, line = 66, 70
    print(
        F"\n{'':>16}{'-' * line:^{center}}\n{'|':>16}\t{F'VALIDATING RDF FILE {fileSize(file)}':^{center}}|\n{'':>16}{'-' * line:^{center}}\n"
    )

    start = time()
    from pathlib import Path
    size = Path(file).stat().st_size

    try:

        print("\n\t1. Checking the RDF file.")
        start = time()
        Dataset().parse(file, format="trig")
        print(
            F"\n\t\t>>> ✅ The converted file \n\t\t[{file}] \n\t\tis in a valid RDF format! "
            F"\n\n\t\t>>> We therefore can highly ascertain that the original file "
            F"\n\t\t[{file}]\n\t\tis in a valid RDF format.")
        print(
            "" if start is None else
            F"""\n\t2. {'Parsing time':.<50} {str(timedelta(seconds=time() - start))}"""
        )

    except Exception as err:
        print("\t\t\t>>> ❌ Invalid RDF")
        print(F"\t\t\t>>> [DETAIL ERROR FROM validate_RDF] {err}")

    finally:
        print(
            F"\n\t{'2. Done in':.<53} {str(timedelta(seconds=time() - start))}"
        )

예제 #25

0

파일 보기

파일: rdfUmlDiagram.py 프로젝트: plt-tud/rdf-uml-diagram

 def __init__(self, showObjs, showClasses, namespace):
     self.ds = Dataset()
     self.d = UmlPygraphVizDiagram()
     self.show_objs = showObjs
     self.show_classes = showClasses
     self.namespace = namespace
     self.add_namespaces(self.namespace)

예제 #26

0

파일 보기

파일: test_dataset.py 프로젝트: RDFLib/rdflib

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

예제 #27

0

파일 보기

파일: linkset.py 프로젝트: LvanWissen/create-datasets

def main(csvfile, linkPredicate, destination):

    g = buildLinkset(csvfile=csvfile,
                     linkPredicate=linkPredicate,
                     identifier=create.term('id/linkset/rijksmuseum/'))

    dsG = rdflib.Dataset()
    dsG.add_graph(g)

    DATE = Literal(datetime.datetime.now().strftime('%Y-%m-%d'),
                   datatype=XSD.datetime)

    rdfSubject.db = dsG
    ds = Linkset(
        create.term('id/linkset/rijksmuseum/'),
        name=[Literal("Rijksmuseum person linkset", lang='en')],
        description=[
            Literal(
                "Dataset that links Rijksmuseum persons to Wikidata and Ecartico. Data harvested from Europeana and Ecartico.",
                lang='en')
        ],
        dateModified=DATE,
        dcdate=DATE,
        dcmodified=DATE,
        target=[
            create.term('id/rijksmuseum/'),
            create.term('id/ecartico/'),
            URIRef("https://wikidata.org/")
        ],
        linkPredicate=[linkPredicate])

    linksetDs = Dataset(
        create.term('id/linkset/'),
        name=[Literal("Linkset collection", lang='en')],
        description=["Collection of linksets stored in this triplestore."])

    linksetDs.subset = [ds]
    linksetDs.hasPart = [ds]
    ds.isPartOf = linksetDs
    ds.inDataset = linksetDs

    dsG.bind('void', void)
    dsG.bind('dcterms', dcterms)
    dsG.bind('schema', schema)
    dsG.serialize(destination=destination, format='trig')

예제 #28

0

파일 보기

    def __init__(self, address=config.BRAIN_URL_LOCAL):
        """
        Interact with Triple store

        Parameters
        ----------
        address: str
            IP address and port of the Triple store
        """

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = """
                    prefix gaf: <http://groundedannotationframework.org/gaf#> 
                    prefix grasp: <http://groundedannotationframework.org/grasp#> 
                    prefix leolaniInputs: <http://cltl.nl/leolani/inputs/>
                    prefix leolaniFriends: <http://cltl.nl/leolani/friends/> 
                    prefix leolaniTalk: <http://cltl.nl/leolani/talk/> 
                    prefix leolaniTime: <http://cltl.nl/leolani/time/> 
                    prefix leolaniWorld: <http://cltl.nl/leolani/world/> 
                    prefix n2mu: <http://cltl.nl/leolani/n2mu/> 
                    prefix ns1: <urn:x-rdflib:> 
                    prefix owl: <http://www.w3.org/2002/07/owl#> 
                    prefix prov: <http://www.w3.org/ns/prov#> 
                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
                    prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> 
                    prefix skos: <http://www.w3.org/2004/02/skos/core#> 
                    prefix time: <http://www.w3.org/TR/owl-time/#> 
                    prefix xml: <http://www.w3.org/XML/1998/namespace> 
                    prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> 
                    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
                    """

        self._define_namespaces()
        self._get_ontology_path()
        self._bind_namespaces()

        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

예제 #29

0

파일 보기

파일: semantic.py 프로젝트: CENDARI/editorsnotes

 def dataset(self):
     #pdb.set_trace()
     if hasattr(self._connection, 'dataset'):
         return getattr(self._connection, 'dataset')
     if self.store=='Sleepycat':
         dataset = Dataset(store=self.store, default_union=True)
         dataset.open(self.store_path, create = True)
     else:
         self.store = Virtuoso(self.connection)
         #dataset = Dataset(store=self.store, default_union=True)
         dataset = ConjunctiveGraph(store=self.store,identifier=CENDARI)
         self.store.connection # force connection
     setattr(self._connection, 'dataset', dataset)
     nm = NamespaceManager(dataset)
     for (prefix, ns) in INIT_NS.iteritems():
         nm.bind(prefix, ns)
     dataset.namespace_manager = nm
     return dataset

예제 #30

0

파일 보기

def get_ds0():
    update_endpoint = 'http://localhost:8890/sparql-auth'
    # query_endpoint = 'http://localhost:8890/sparql'
    store = SPARQLUpdateStore(update_endpoint,
                              update_endpoint,
                              autocommit=True)
    store.setHTTPAuth(DIGEST)
    store.setCredentials(user='******', passwd='admin')
    return Dataset(store)

예제 #31

0

파일 보기

 def test_ldp_access_with_ldp(self):
     with app.app_context():
         # todo: post collection to sparql, retrieve via LDP and compare
         c_obj = self.mock.collection()
         self.db.set_collection(c_obj)
         g = Dataset().parse(self.db.marmotta.ldp(encoder.encode(c_obj.id)),
                             format="n3")
         r_obj = self.db.RDA.graph_to_object(g).pop()
         self.assertDictEqual(c_obj.dict(), r_obj.dict())

예제 #32

0

파일 보기

파일: ldp_db.py 프로젝트: luoyu357/CollectionAPIVersion.1

 def set_collection(self, c_obj, over_write=False):
     if isinstance(c_obj, Model):
         c_obj = [c_obj]
     elif not isinstance(c_obj, list):
         raise ParseError()
     # create LD collection and declare as ldp:BasicContainer
     ds = Dataset()
     ldp = ds.graph(identifier=LDP.ns)
     for c in c_obj:
         c_id = encoder.encode(c.id)
         collection = ds.graph(identifier=self.marmotta.ldp(c_id))
         collection += self.RDA.object_to_graph(collection.identifier, c)
         ldp += LDP.add_contains(self.marmotta.ldp(), collection.identifier)
         member = ds.graph(identifier=self.marmotta.ldp(c_id+'/member'))
         ldp += LDP.add_contains(collection.identifier, member.identifier)
     ins = self.sparql.insert(ds)
     if ins.status_code is 200:
         return c_obj
     else:
         raise DBError()

예제 #33

0

파일 보기

파일: dikb_to_nanopub.py 프로젝트: dbmi-pitt/DIKB-Micropublication

def createNanopubs(g):
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://purl.org/net/nlprepository/spl-ddi-annotation-poc#")
	ds.namespace_manager.bind("prov","http://www.w3.org/ns/prov#")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	
	bindings = g.query(interactSelect)
	for b in bindings:
		npURI = URIRef(b['inter'] + "-nanopub")
		headURI = URIRef(b['inter'] + "-head")
		aURI =  URIRef(b['inter'] + "-assertion")
		pubInfoURI = URIRef(b['inter'] + "-pubInfo")
		provURI = URIRef(b['inter'] + "-provenance")
		
		
		head = ds.add_graph(headURI)
		head.add((npURI, RDF.type, np['Nanopublication']))
		head.add((aURI, RDF.type, np['Assertion']))
		head.add((provURI, RDF.type, np['Provenance']))
		head.add((pubInfoURI, RDF.type, np['PublicationInfo']))
		head.add((npURI, np['hasAssertion'], aURI))
		head.add((npURI, np['hasProvenance'], provURI))
		head.add((npURI, np['hasPublicationInfo'], pubInfoURI))

		#print head.serialize()
		
		a = ds.add_graph(aURI)
		a.add((b['s'], URIRef('http://dbmi-icode-01.dbmi.pitt.edu/dikb/vocab/interactsWith'), b['o']))
		a.add((b['s'], RDF.type, sio["SIO_010038"]))
		a.add((b['o'], RDF.type,  sio["SIO_010038"]))
		
		prov = ds.add_graph(provURI)
		prov.add((aURI, w3prov['wasDerivedFrom'], b['inter']))
		
 	print ds.serialize(format='trig')

예제 #34

0

파일 보기

파일: __init__.py 프로젝트: raadjoe/COW

    def __init__(self, graph_identifier, dataset, variables, headers):
        self._headers = headers
        self._variables = variables

        # TODO: Family is now superseded by a full dataset description in the form of QBer

        # if 'family' in config:
        #     self._family = config['family']
        #     try:
        #         family_def = getattr(mappings, config['family'])
        #         self._nocode = family_def['nocode']
        #         self._integer = family_def['integer']
        #         self._mappings = family_def['mappings']
        #     except:
        #         logger.warning('No family definition found')
        #         self._nocode = []
        #         self._integer = []
        #         self._mappings = {}
        # else:
        #     self._family = None

        # TODO: number_observations is now superseded by a full dataset description in the form of QBer

        # if 'number_observations' in config:
        #     self._number_observations = config['number_observations']
        # else:
        #     self._number_observations = None

        # TODO: stop is now superseded by a full dataset description in the form of QBer
        # self._stop = config['stop']

        # TODO: Now setting these as simple defaults
        self._family = None
        self._number_observations = True
        self._stop = None

        # TODO: Think of what to do here...
        if self._family is None:
            self._VOCAB_URI_PATTERN = "{0}{{}}/{{}}".format(self._VOCAB_BASE)
            self._RESOURCE_URI_PATTERN = "{0}{{}}/{{}}".format(
                self._RESOURCE_BASE)
        else:
            self._VOCAB_URI_PATTERN = "{0}{1}/{{}}/{{}}".format(
                self._VOCAB_BASE, self._family)
            self._RESOURCE_URI_PATTERN = "{0}{1}/{{}}/{{}}".format(
                self._RESOURCE_BASE, self._family)

        self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(graph_identifier))

        self._dataset_name = dataset['name']
        self._dataset_uri = URIRef(dataset['uri'])

예제 #35

0

파일 보기

def test_hext_json_representation():
    """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON"""
    d = Dataset()
    trig_data = """
            PREFIX ex: <http://example.com/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

            ex:g1 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                    ex:p2 [
                        a owl:Thing ;
                        rdf:value "thingy" ;
                    ] ;
                    ex:p3 "Object 3" , "Object 4 - English"@en ;
                    ex:p4 "2021-12-03"^^xsd:date ;
                    ex:p5 42 ;
                    ex:p6 "42" ;
                .
            }

            ex:g2 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                .
                ex:s11 ex:p11 ex:o11 , ex:o12 .
            }

            # default graph triples
            ex:s1 ex:p1 ex:o1 , ex:o2 .
            ex:s21 ex:p21 ex:o21 , ex:o22 .
           """
    d.parse(data=trig_data, format="trig")
    out = d.serialize(format="hext")
    for line in out.splitlines():
        j = json.loads(line)
        assert isinstance(j, list)

예제 #36

0

파일 보기

파일: csvw.py 프로젝트: CLARIAH/wp4-converters

    def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format):
        self.ds = Dataset()
        # self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(identifier))

        self.columns = columns
        self.schema = schema
        self.metadata_graph = metadata_graph
        self.encoding = encoding
        self.output_format = output_format

        self.templates = {}

        self.aboutURLSchema = self.schema.csvw_aboutUrl

예제 #37

0

파일 보기

파일: dikb_mp_to_np_assertion.py 프로젝트: dbmi-pitt/DIKB-Micropublication

def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")

	assertionCount = 1
	enzymeCount = 1

	pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')])
	cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')])
	pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')])
	substrateD = {}
	inhibitorD = {}
			
	bindings = g.query(interactSelect)
	for b in bindings:

		if( pddiD.has_key(str(b['c'].decode('utf-8'))) ):
			tempClaim = pddiD[ str(b['c'].decode('utf-8')) ]
			claimInfo = tempClaim.split(',')
			claimSub = claimInfo[1]
			claimObj = claimInfo[2]
			predicateType = claimInfo[0].strip('\n')
				
			if(predicateType == "increases_auc"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
			
				bn1 = BNode('1')
				bn2 = BNode('2')
				bn3 = BNode('3')
				bn4 = BNode('4')
				bn5 = BNode('5')
				bn6 = BNode('6')
				bn7 = BNode('7')
				bn8 = BNode('8')
				bn9 = BNode('9')
				bn10 = BNode('10')

				assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction"

				a = ds.add_graph((aURI))
				a.add(( aURI, RDF.type, np.assertion))
				a.add(( aURI, RDF.type, owl.Class))
				a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower()))))	 
				a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000")))
				a.add(( bn1, RDF.type, owl.Restriction))
				a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn2, RDF.type, owl.Class))
				a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012")))
				a.add(( bn5, RDF.first, bn4))
				a.add(( bn3, RDF.rest, bn5))
				a.add(( bn4, RDF.type, owl.Restriction))
				a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n'))))
				a.add(( bn5, RDF.rest, RDF.nil))
				a.add(( bn2, owl.intersectionOf, bn3))
				a.add(( bn1, owl.someValuesFrom, bn2))
				a.add(( aURI, RDFS.subClassOf, bn1))
				a.add(( bn6, RDF.type, owl.Restriction))
				a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn7, RDF.type, owl.Class))
				a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013")))
				a.add(( bn10, RDF.first, bn9))
				a.add(( bn8, RDF.rest, bn10))
				a.add(( bn9, RDF.type, owl.Restriction))
				a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n'))))
				a.add(( bn10, RDF.rest, RDF.nil))
				a.add(( bn7, owl.intersectionOf, bn8))
				a.add(( bn6, owl.someValuesFrom, bn7))
				a.add(( aURI, RDFS.subClassOf, bn6))

				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
			elif(predicateType == "substrate_of"):
						
				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " substrate of " + eLabel

				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))				   
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427")))
				a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower())))
				a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n'))))

				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				
			elif(predicateType == "inhibits"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " inhibits " + eLabel
				
				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n'))))

	print ds.serialize(format='trig')

예제 #38

0

파일 보기

파일: provinator.py 프로젝트: Omegaice/smartcontainers

from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
from rdflib.namespace import FOAF
from rdflib.serializer import Serializer
import rdflib.resource
import uuid

#  Create a default dataset graph.
ds = Dataset(default_union=True)

# J SON-LD serializer requires an explicit context.
#  https://github.com/RDFLib/rdflib-jsonld
#  context = {"@vocab": "http://purl.org/dc/terms/", "@language": "en"}

context = {"prov": "http://www.w3.org/ns/prov#",
           "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
           "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
           "xsd": "http://www.w3.org/2001/XMLSchema#",
           "dc": "http://purl.org/dc/terms"}

# Define some namespaces
PROV = Namespace("http://www.w3.org/ns/prov#")
ORE = Namespace("http://www.openarchives.org/ore/terms/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
DC = Namespace("http://purl.org/dc/terms/")
UUIDNS = Namespace("urn:uuid:")
DOCKER = Namespace("http://w3id.org/daspos/docker#")
# W3C namespace:
POSIX = Namespace("http://www.w3.org/ns/posix/stat#")
ACL = Namespace("http://www.w3.org/ns/auth/acl#")

# DASPOS namespaces

예제 #39

0

파일 보기

파일: test_dataset.py 프로젝트: JesusPatate/rdflib

class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)


    def testGraphAware(self): 
        if not self.graph.store.graph_aware: return 
        
        g = self.graph
        g1 = g.graph(self.c1)
        
        
        # added graph exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty 
        self.assertEquals(len(g1), 0)
        
        g1.add( (self.tarek, self.likes, self.pizza) )

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove( (self.tarek, self.likes, self.pizza) )

        # added graph is empty 
        self.assertEquals(len(g1), 0)

        # graph still exists, although empty
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)
                
        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))
        
    def testDefaultGraph(self): 
        
        self.graph.add(( self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self): 
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), 
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])

예제 #40

0

파일 보기

파일: rdfUmlDiagram.py 프로젝트: comvantage/rdf-uml-diagram

 def __init__(self, output_filename='output.png'):
     self.ds = Dataset()
     #self.d = UmlGraphVizDiagram(output_filename)
     self.d = UmlPygraphVizDiagram(output_filename)

예제 #41

0

파일 보기

파일: rdfUmlDiagram.py 프로젝트: comvantage/rdf-uml-diagram

class RDFtoUmlDiagram():
    """
    Transform a RDF dataset to an UML diagram
    """
    def __init__(self, output_filename='output.png'):
        self.ds = Dataset()
        #self.d = UmlGraphVizDiagram(output_filename)
        self.d = UmlPygraphVizDiagram(output_filename)

    def load_rdf(self, filename, input_format=None):
        if input_format:
            rdf_format = input_format
        elif filename is not sys.stdin:
            format_list = {'.xml': 'xml',
                           '.rdf': 'xml',
                           '.owl': 'xml',
                           '.n3': 'n3',
                           '.ttl': 'turtle',
                           '.nt': 'nt',
                           '.trig': 'trig',
                           '.nq': 'nquads',
                           '': 'turtle'}
            extension = splitext(filename.name)[1]
            rdf_format = format_list[extension]
        else:
            rdf_format = 'turtle'
        temp = self.ds.graph("file://"+filename.name)
        temp.parse(filename.name, format=rdf_format)

    def add_namespaces(self, namespaces):
        if namespaces:
            for ns in namespaces:
                self.ds.namespace_manager.bind(ns[0],ns[1])
    
    def start_subgraph(self, graph_name):
        self.d.start_subgraph(graph_name.strip('[<>]:_'))
        
    def close_subgraph(self):
        self.d.close_subgraph()

    def add_object_node(self, object_name, classes_name, attributes):
        self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes)

    def add_class_node(self, class_name, attributes):
        self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes)

    def add_edge(self, src, dst, predicate):
        self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate))

    def add_subclass_edge(self, src, dst):
        self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst))

    def create_namespace_box(self):
        # Create Namespace box
        self.d.add_label("Namespaces:\l")
        for ns in sorted(self.ds.namespaces()):
            self.d.add_label("%s:\t%s \l" % (ns[0], ns[1]))

    def output_dot(self):
        self.d.write_to_file()

    def close(self):
        self.create_namespace_box()
        self.d.close()

    def visualize(self):
        self.d.visualize()

예제 #42

0

파일 보기

파일: csvw.py 프로젝트: CLARIAH/wp4-converters

class BurstConverter(object):
    """The actual converter, that processes the chunk of lines from the CSV file, and uses the instructions from the ``schema`` graph to produce RDF."""

    def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format):
        self.ds = Dataset()
        # self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(identifier))

        self.columns = columns
        self.schema = schema
        self.metadata_graph = metadata_graph
        self.encoding = encoding
        self.output_format = output_format

        self.templates = {}

        self.aboutURLSchema = self.schema.csvw_aboutUrl

    def equal_to_null(self, nulls, row):
        """Determines whether a value in a cell matches a 'null' value as specified in the CSVW schema)"""
        for n in nulls:
            n = Item(self.metadata_graph, n)
            col = str(n.csvw_name)
            val = str(n.csvw_null)
            if row[col] == val:
                logger.debug("Value of column {} ('{}') is equal to specified 'null' value: '{}'".format(col, unicode(row[col]).encode('utf-8'), val))
                # There is a match with null value
                return True
        # There is no match with null value
        return False

    def process(self, count, rows, chunksize):
        """Process the rows fed to the converter. Count and chunksize are used to determine the
        current row number (needed for default observation identifiers)"""
        obs_count = count * chunksize

        # logger.info("Row: {}".format(obs_count)) #removed for readability

        # We iterate row by row, and then column by column, as given by the CSVW mapping file.
        mult_proc_counter = 0
        iter_error_counter= 0
        for row in rows:
            # This fixes issue:10
            if row is None:
                mult_proc_counter += 1
                # logger.debug( #removed for readability
                #     "Skipping empty row caused by multiprocessing (multiple of chunksize exceeds number of rows in file)...")
                continue

            # set the '_row' value in case we need to generate 'default' URIs for each observation ()
            # logger.debug("row: {}".format(obs_count)) #removed for readability
            row[u'_row'] = obs_count
            count += 1

            # The self.columns dictionary gives the mapping definition per column in the 'columns'
            # array of the CSVW tableSchema definition.
            for c in self.columns:

                c = Item(self.metadata_graph, c)
                # default about URL
                s = self.expandURL(self.aboutURLSchema, row)

                try:
                    # Can also be used to prevent the triggering of virtual
                    # columns!

                    # Get the raw value from the cell in the CSV file
                    value = row[unicode(c.csvw_name)]
                    # This checks whether we should continue parsing this cell, or skip it.
                    if self.isValueNull(value, c):
                        continue

                    # If the null values are specified in an array, we need to parse it as a collection (list)
                    elif isinstance(c.csvw_null, Item):
                        nulls = Collection(self.metadata_graph, BNode(c.csvw_null))

                        if self.equal_to_null(nulls, row):
                            # Continue to next column specification in this row, if the value is equal to (one of) the null values.
                            continue
                except:
                    # No column name specified (virtual) because there clearly was no c.csvw_name key in the row.
                    # logger.debug(traceback.format_exc()) #removed for readability
                    iter_error_counter +=1
                    if isinstance(c.csvw_null, Item):
                        nulls = Collection(self.metadata_graph, BNode(c.csvw_null))
                        if self.equal_to_null(nulls, row):
                            # Continue to next column specification in this row, if the value is equal to (one of) the null values.
                            continue

                try:
                    # This overrides the subject resource 's' that has been created earlier based on the
                    # schema wide aboutURLSchema specification.
                    if unicode(c.csvw_virtual) == u'true' and c.csvw_aboutUrl is not None:
                        s = self.expandURL(c.csvw_aboutUrl, row)

                    if c.csvw_valueUrl is not None:
                        # This is an object property, because the value needs to be cast to a URL
                        p = self.expandURL(c.csvw_propertyUrl, row)
                        o = self.expandURL(c.csvw_valueUrl, row)
                        if self.isValueNull(os.path.basename(unicode(o)), c):
                            logger.debug("skipping empty value")
                            continue

                        if unicode(c.csvw_virtual) == u'true' and c.csvw_datatype is not None and URIRef(c.csvw_datatype) == XSD.anyURI:
                            # Special case: this is a virtual column with object values that are URIs
                            # For now using a test special property
                            value = row[unicode(c.csvw_name)].encode('utf-8')
                            o = URIRef(iribaker.to_iri(value))

                        if unicode(c.csvw_virtual) == u'true' and c.csvw_datatype is not None and URIRef(c.csvw_datatype) == XSD.linkURI:
                            about_url = str(c.csvw_aboutUrl)
                            about_url = about_url[about_url.find("{"):about_url.find("}")+1]
                            s = self.expandURL(about_url, row)
                            # logger.debug("s: {}".format(s))
                            value_url = str(c.csvw_valueUrl)
                            value_url = value_url[value_url.find("{"):value_url.find("}")+1]
                            o = self.expandURL(value_url, row)
                            # logger.debug("o: {}".format(o))

                        # For coded properties, the collectionUrl can be used to indicate that the
                        # value URL is a concept and a member of a SKOS Collection with that URL.
                        if c.csvw_collectionUrl is not None:
                            collection = self.expandURL(c.csvw_collectionUrl, row)
                            self.g.add((collection, RDF.type, SKOS['Collection']))
                            self.g.add((o, RDF.type, SKOS['Concept']))
                            self.g.add((collection, SKOS['member'], o))

                        # For coded properties, the schemeUrl can be used to indicate that the
                        # value URL is a concept and a member of a SKOS Scheme with that URL.
                        if c.csvw_schemeUrl is not None:
                            scheme = self.expandURL(c.csvw_schemeUrl, row)
                            self.g.add((scheme, RDF.type, SKOS['Scheme']))
                            self.g.add((o, RDF.type, SKOS['Concept']))
                            self.g.add((o, SKOS['inScheme'], scheme))
                    else:
                        # This is a datatype property
                        if c.csvw_value is not None:
                            value = self.render_pattern(unicode(c.csvw_value), row)
                        elif c.csvw_name is not None:
                            # print s
                            # print c.csvw_name, self.encoding
                            # print row[unicode(c.csvw_name)], type(row[unicode(c.csvw_name)])
                            # print row[unicode(c.csvw_name)].encode('utf-8')
                            # print '...'
                            value = row[unicode(c.csvw_name)].encode('utf-8')
                        else:
                            raise Exception("No 'name' or 'csvw:value' attribute found for this column specification")

                        # If propertyUrl is specified, use it, otherwise use
                        # the column name
                        if c.csvw_propertyUrl is not None:
                            p = self.expandURL(c.csvw_propertyUrl, row)
                        else:
                            if "" in self.metadata_graph.namespaces():
                                propertyUrl = self.metadata_graph.namespaces()[""][
                                    unicode(c.csvw_name)]
                            else:
                                propertyUrl = "{}{}".format(get_namespaces()['sdv'],
                                    unicode(c.csvw_name))

                            p = self.expandURL(propertyUrl, row)

                        if c.csvw_datatype is not None:
                            if URIRef(c.csvw_datatype) == XSD.anyURI:
                                # The xsd:anyURI datatype will be cast to a proper IRI resource.
                                o = URIRef(iribaker.to_iri(value))
                            elif URIRef(c.csvw_datatype) == XSD.string and c.csvw_lang is not None:
                                # If it is a string datatype that has a language, we turn it into a
                                # language tagged literal
                                # We also render the lang value in case it is a
                                # pattern.
                                o = Literal(value, lang=self.render_pattern(
                                    c.csvw_lang, row))
                            else:
                                o = Literal(value, datatype=c.csvw_datatype, normalize=False)
                        else:
                            # It's just a plain literal without datatype.
                            o = Literal(value)

                    # Add the triple to the assertion graph
                    self.g.add((s, p, o))

                    # Add provenance relating the propertyUrl to the column id
                    if '@id' in c:
                        self.g.add((p, PROV['wasDerivedFrom'], URIRef(c['@id'])))

                except:
                    # print row[0], value
                    traceback.print_exc()

            # We increment the observation (row number) with one
            obs_count += 1

        logger.debug(
            "{} row skips caused by multiprocessing (multiple of chunksize exceeds number of rows in file)...".format(mult_proc_counter))
        logger.debug(
            "{} errors encountered while trying to iterate over a NoneType...".format(mult_proc_counter))
        logger.info("... done")
        return self.ds.serialize(format=self.output_format)

    # def serialize(self):
    #     trig_file_name = self.file_name + '.trig'
    #     logger.info("Starting serialization to {}".format(trig_file_name))
    #
    #     with open(trig_file_name, 'w') as f:
    #         self.np.serialize(f, format='trig')
    #     logger.info("... done")

    def render_pattern(self, pattern, row):
        """Takes a Jinja or Python formatted string, and applies it to the row value"""
        # Significant speedup by not re-instantiating Jinja templates for every
        # row.
        if pattern in self.templates:
            template = self.templates[pattern]
        else:
            template = self.templates[pattern] = Template(pattern)

        # TODO This should take into account the special CSVW instructions such as {_row}
        # First we interpret the url_pattern as a Jinja2 template, and pass all
        # column/value pairs as arguments
        rendered_template = template.render(**row)

        try:
            # We then format the resulting string using the standard Python2
            # expressions
            return rendered_template.format(**row)
        except:
            logger.warning(
                u"Could not apply python string formatting, probably due to mismatched curly brackets. IRI will be '{}'. ".format(rendered_template))
            return rendered_template

    def expandURL(self, url_pattern, row, datatype=False):
        """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef"""
        url = self.render_pattern(unicode(url_pattern), row)

        # DEPRECATED
        # for ns, nsuri in namespaces.items():
        #     if url.startswith(ns):
        #         url = url.replace(ns + ':', nsuri)
        #         break

        try:
            iri = iribaker.to_iri(url)
            rfc3987.parse(iri, rule='IRI')
        except:
            raise Exception(u"Cannot convert `{}` to valid IRI".format(url))

        # print "Baked: ", iri
        return URIRef(iri)

    def isValueNull(self, value, c):
        """This checks whether we should continue parsing this cell, or skip it because it is empty or a null value."""
        try:
            if len(value) == 0 and unicode(c.csvw_parseOnEmpty) == u"true":
                print("Not skipping empty value")
                return False #because it should not be skipped
            elif len(value) == 0 or value == unicode(c.csvw_null) or value in [unicode(n) for n in c.csvw_null] or value == unicode(self.schema.csvw_null):
                # Skip value if length is zero and equal to (one of) the null value(s)
                logger.debug(
                    "Length is 0 or value is equal to specified 'null' value")
                return True
        except:
            logger.debug("null does not exist or is not a list.")
        return False

예제 #43

0

파일 보기

파일: rdfUmlDiagram.py 프로젝트: plt-tud/rdf-uml-diagram

class RDFtoUmlDiagram():
    """
    Transform a RDF dataset to an UML diagram
    """

    def __init__(self, showObjs, showClasses, namespace):
        self.ds = Dataset()
        self.d = UmlPygraphVizDiagram()
        self.show_objs = showObjs
        self.show_classes = showClasses
        self.namespace = namespace
        self.add_namespaces(self.namespace)

    def load_rdf(self, filename, input_format=None):
        if input_format:
            rdf_format = input_format
        elif filename is not sys.stdin:
            format_list = {'.xml': 'xml',
                           '.rdf': 'xml',
                           '.owl': 'xml',
                           '.n3': 'n3',
                           '.ttl': 'turtle',
                           '.nt': 'nt',
                           '.trig': 'trig',
                           '.nq': 'nquads',
                           '': 'turtle'}
            extension = splitext(filename.name)[1]
            rdf_format = format_list[extension]
        else:
            rdf_format = 'turtle'
        print("using rdf format: " + rdf_format)
        temp = self.ds.graph("file://"+filename.name)
        temp.parse(filename.name, format=rdf_format)

    def add_namespaces(self, namespaces):
        if namespaces:
            for ns in namespaces:
                self.ds.namespace_manager.bind(ns[0],ns[1])
    
    def start_subgraph(self, graph_name):
        self.d.start_subgraph(graph_name.strip('[<>]:_'))


    def add_object_node(self, object_name, classes_name, attributes):
        self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes)

    def add_class_node(self, class_name, attributes):
        self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes)

    def add_edge(self, src, dst, predicate):
        self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate))

    def add_subclass_edge(self, src, dst):
        self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst))

    def create_namespace_box(self):
        # Create Namespace box
        label = """<
            <table align="left" cellborder="0">
                <tr><td align='center' colspan='2'><b>Namespaces</b></td></tr>"""
        for ns in sorted(self.ds.namespaces()):
            label += "<tr><td align='left'>%s:</td><td align='left'>%s</td></tr>" % (ns[0], ns[1] )
        label += "</table> >"

        self.d.set_label(label)

    def output_dot(self, filename):
        self.d.write_to_file(filename)

    def visualize(self, filename):
        self.d.visualize(filename, self.ds.namespaces())


    def create_diagram(self, object_nodes=True, class_nodes=False):
        # Iterate over all graphs
        for graph in self.ds.contexts():
            graph_name = graph.n3()
            if graph_name == "[<urn:x-rdflib:default>]":
                break
            graph = graph.skolemize()
            if len(graph) > 0:
                self.start_subgraph(graph_name)
                if self.show_objs:
                    self.create_object_nodes(graph)
                if self.show_classes:
                    self.create_class_nodes(graph)
        self.d.add_undescribed_nodes()
        self.create_namespace_box()


    def create_object_nodes(self, graph):
        # object nodes
        query_nodes = """PREFIX owl: <http://www.w3.org/2002/07/owl#>
                    SELECT DISTINCT ?node
                    WHERE {
                        ?node a ?class.
                        FILTER (?class not IN (rdfs:Class, owl:Class, owl:Property, owl:ObjectProperty, owl:DatatypeProperty))
                    } ORDER BY ?node"""
        result_nodes = graph.query(query_nodes)
        for row_nodes in result_nodes:
            # adding the classes to the node (can be more than one)
            query_classes = """SELECT DISTINCT ?class
                    WHERE {
                        %s a ?class.
                    } ORDER BY ?class""" % row_nodes['node'].n3()
            result_classes = graph.query(query_classes)
            classes = []
            for row_classes in result_classes:
                if not self.show_classes:
                    classes.append(self.ds.namespace_manager.qname(row_classes['class']))
                else:
                    self.add_edge(row_nodes['node'], row_classes['class'],
                                  "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
                

            # adding the attributes to the node
            query_attributes = """SELECT DISTINCT ?p ?o
                        WHERE {
                            %s ?p ?o.
                            FILTER (isLiteral(?o))
                        } ORDER BY ?p ?o""" % row_nodes['node'].n3()
            result_attributes = graph.query(query_attributes)
            attributes = []
            for row_attributes in result_attributes:
                attributes.append(
                    self.ds.namespace_manager.qname(row_attributes['p']) + " = " + str(row_attributes['o']))
            self.add_object_node(row_nodes['node'], ", ".join(classes), attributes)

        # object node connections
        query_connections = """SELECT DISTINCT ?c1 ?c2 ?p
                    WHERE {
                        ?c1 ?p ?c2.
                        FILTER (!isLiteral(?c2))
                        FILTER (?p not IN (rdf:type, rdfs:domain, rdfs:range, rdfs:subClassOf))
                    } ORDER BY ?c1 ?p ?c2"""
        result_connections = graph.query(query_connections)
        for row_connections in result_connections:
            self.add_edge(row_connections['c1'], row_connections['c2'], row_connections['p'])


    def create_class_nodes(self, graph):
        # RDFS stuff
        query_classes = """PREFIX owl: <http://www.w3.org/2002/07/owl#>
                    SELECT DISTINCT ?class
                    WHERE {
                        ?class a ?c .
                        FILTER (?c in (rdfs:Class, owl:Class))
                    } ORDER BY ?class"""
        result_classes = graph.query(query_classes)
        for row_classes in result_classes:
            query_datatype_property = """
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
                SELECT DISTINCT ?property ?range
                        WHERE {
                            ?property rdfs:domain %s;
                                a owl:DatatypeProperty.
                            OPTIONAL{ ?property rdfs:range ?range. }
                        } ORDER BY ?property""" % row_classes['class'].n3()
            result_datatype_property = graph.query(query_datatype_property)
            attributes = []
            for r in result_datatype_property:
                text = self.ds.namespace_manager.qname(r['property'])
                if r['range']:
                    text += " = " + self.ds.namespace_manager.qname(r['range'])
                attributes.append(text)
            self.add_class_node(row_classes['class'], attributes)

        query_object_property = """SELECT DISTINCT ?src ?dest ?property
                    WHERE {
                       ?property a <http://www.w3.org/2002/07/owl#ObjectProperty>;
                            rdfs:domain ?src;
                            rdfs:range ?dest.
                    } ORDER BY ?src ?property ?dest"""
        result_object_property = graph.query(query_object_property)
        for row_object_property in result_object_property:
            self.add_edge(row_object_property['src'], row_object_property['dest'], row_object_property['property'])

        query_subclass = """SELECT DISTINCT ?src ?dest
                    WHERE {
                       ?src rdfs:subClassOf ?dest.
                    } ORDER BY ?src ?dest"""
        result_subclass = graph.query(query_subclass)
        for row_subclass in result_subclass:
            self.add_subclass_edge(row_subclass['src'], row_subclass['dest'])

예제 #44

0

파일 보기

파일: dockerprov.py 프로젝트: sszakony/smartcontainers

#!/usr/bin/python
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource
from provmodified import Entity
import provmodified as prov
import json
import subprocess, shlex
import collections

DOCKER = Namespace("http://www.example.org/ns/docker#")
PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("docker", DOCKER)
ds.bind("prov", PROV)
default_graph = ds


def bind_ns(prefix, namespace):
    ds.namespace_manager.bind(prefix, Namespace(namespace))


def parse_json_byfile(filename):
    with open(filename) as data_file:
        data = json.load(data_file)
    return data[0]


def inspect_json(cmd):
    # print cmd
    p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

예제 #45

0

파일 보기

파일: prov.py 프로젝트: sszakony/smartcontainers

from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource

"""
@newfield iri: IRI
"""

PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("prov", PROV)
default_graph = ds
#print type(default_graph)


config = {
    "useInverseProperties": False
}


def set_use_inverse_properties(flag=False):
    config["useInverseProperties"] = flag


def using_inverse_properties():
    return config["useInverseProperties"]


def clear_graph(bundle=default_graph):
    bundle.remove((None, None, None))

예제 #46

0

파일 보기

파일: client.py 프로젝트: datastark/brwsr

LOCAL_STORE = config.LOCAL_STORE
LOCAL_FILE = config.LOCAL_FILE

SPARQL_ENDPOINT_MAPPING = config.SPARQL_ENDPOINT_MAPPING

SPARQL_ENDPOINT = config.SPARQL_ENDPOINT

DEFAULT_BASE = config.DEFAULT_BASE

QUERY_RESULTS_LIMIT = config.QUERY_RESULTS_LIMIT
CUSTOM_PARAMETERS = config.CUSTOM_PARAMETERS

labels = {}

g = Dataset()

if LOCAL_STORE:
    log.info("Loading local file: {}".format(LOCAL_FILE))
    try:
        format = rdflib.util.guess_format(LOCAL_FILE)
        g.load(LOCAL_FILE, format=format)
    except:
        log.error(traceback.format_exc())
        raise Exception("Cannot guess file format for {} or could not load file".format(LOCAL_FILE))


def visit(url, format='html'):
    log.debug("Starting query")

    if LOCAL_STORE:

예제 #47

0

파일 보기

파일: general_converter.py 프로젝트: TRUMP-project/documentation

        print header
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource

path = 'source_datasets/'
filename_population = 'all_population_by_type.csv'
filename_unemployment = 'unemployment_eu.csv'
filename_inflow = 'inflow_dataset.csv'
filename_asylum = 'asylum_seekers.csv'

dataset = Dataset()
dataset.bind('mpr', RESOURCE)
dataset.bind('mpo', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('geo_country_code', GCC)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)
dataset.bind('sdmx', SDMX)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, unemployment_eu_graph = convert_unemployment_csv(filename_unemployment,dataset,URIRef(graph_uri_base + 'unemployment_eu_graph'))

dataset, population_eu_graph = convert_population_csv(filename_population,dataset,URIRef(graph_uri_base + 'population_eu_graph'))

dataset, inflow_graph = convert_inflow_csv(filename_inflow,dataset,URIRef(graph_uri_base + 'inflow_graph'))

예제 #48

0

파일 보기

파일: converter.py 프로젝트: CLARIAH/wp4-datalegend-api

def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    Arguments:
    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    """
    BASE = Namespace("{}/".format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind("qbrv", QBRV)
    rdf_dataset.bind("qbr", QBR)
    rdf_dataset.bind("qb", QB)
    rdf_dataset.bind("skos", SKOS)
    rdf_dataset.bind("prov", PROV)
    rdf_dataset.bind("np", NP)
    rdf_dataset.bind("foaf", FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + "/" + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE["assertion/" + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE["provenance/" + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE["pubinfo/" + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR["person/" + profile["email"]]

    rdf_dataset.add((author_uri, RDF.type, FOAF["Person"]))
    rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"])))
    rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"])))
    rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"])))
    try:
        rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"])))
    except KeyError:
        pass

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE["nanopublication/" + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"]))
    rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"]))
    rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"]))
    rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"]))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef("https://github.com/CLARIAH/qber.git")

    pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE["structure"]

    assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"]))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"]))

    assertion_graph.add((dataset_uri, QB["structure"], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable["original"]["uri"])
        variable_label = Literal(variable["original"]["label"])
        variable_type = URIRef(variable["type"])

        codelist_uri = URIRef(variable["codelist"]["original"]["uri"])
        codelist_label = Literal(variable["codelist"]["original"]["label"])

        # The variable as component of the definition
        component_uri = safe_url(BASE, "component/" + variable["original"]["label"])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB["component"], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if "description" in variable and variable["description"] != "":
            assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable["uri"] != str(variable_uri):
            assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"])))

        if variable_type == QB["DimensionProperty"]:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["dimension"], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable["category"] == "coded":
                assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"]))

        elif variable_type == QB["MeasureProperty"]:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["measure"], variable_uri))
        elif variable_type == QB["AttributeProperty"]:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["attribute"], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable["category"] == "coded":
            assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"]))
            assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB["codeList"], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable["codelist"]["uri"] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS["member"], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "identifier":
            # Generate a SKOS concept for each of the values
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], value_label))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "other":
            # Generate a literal for each of the values when converting the dataset (but not here)
            pass

    return rdf_dataset

예제 #49

0

파일 보기

파일: test_dawg.py 프로젝트: afujii/rdflib

def query_test(t):
    uri, name, comment, data, graphdata, query, resfile, syntax = t

    # the query-eval tests refer to graphs to load by resolvable filenames
    rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True

    if uri in skiptests:
        raise SkipTest()

    def skip(reason='(none)'):
        print "Skipping %s from now on." % uri
        f = open("skiptests.list", "a")
        f.write("%s\t%s\n" % (uri, reason))
        f.close()

    try:
        g = Dataset()
        if data:
            g.default_context.load(data, format=_fmt(data))

        if graphdata:
            for x in graphdata:
                g.load(x, format=_fmt(x))

        if not resfile:
            # no result - syntax test

            if syntax:
                translateQuery(parseQuery(
                    open(query[7:]).read()), base=urljoin(query, '.'))
            else:
                # negative syntax test
                try:
                    translateQuery(parseQuery(
                        open(query[7:]).read()), base=urljoin(query, '.'))

                    assert False, 'Query should not have parsed!'
                except:
                    pass  # it's fine - the query should not parse
            return

        # eval test - carry out query
        res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.'))

        if resfile.endswith('ttl'):
            resg = Graph()
            resg.load(resfile, format='turtle', publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith('rdf'):
            resg = Graph()
            resg.load(resfile, publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith('srj'):
            res = Result.parse(open(resfile[7:]), format='json')
        elif resfile.endswith('tsv'):
            res = Result.parse(open(resfile[7:]), format='tsv')

        elif resfile.endswith('csv'):
            res = Result.parse(open(resfile[7:]), format='csv')

            # CSV is lossy, round-trip our own resultset to
            # lose the same info :)

            # write bytes, read strings...
            s = BytesIO()
            res2.serialize(s, format='csv')
            print s.getvalue()
            s = StringIO(s.getvalue().decode('utf-8'))  # hmm ?
            res2 = Result.parse(s, format='csv')

        else:
            res = Result.parse(open(resfile[7:]), format='xml')

        if not DETAILEDASSERT:
            eq(res.type, res2.type, 'Types do not match')
            if res.type == 'SELECT':
                eq(set(res.vars), set(res2.vars), 'Vars do not match')
                comp = bindingsCompatible(
                    set(res),
                    set(res2)
                )
                assert comp, 'Bindings do not match'
            elif res.type == 'ASK':
                eq(res.askAnswer, res2.askAnswer, 'Ask answer does not match')
            elif res.type in ('DESCRIBE', 'CONSTRUCT'):
                assert isomorphic(
                    res.graph, res2.graph), 'graphs are not isomorphic!'
            else:
                raise Exception('Unknown result type: %s' % res.type)
        else:
            eq(res.type, res2.type,
               'Types do not match: %r != %r' % (res.type, res2.type))
            if res.type == 'SELECT':
                eq(set(res.vars),
                   set(res2.vars), 'Vars do not match: %r != %r' % (
                   set(res.vars), set(res2.vars)))
                assert bindingsCompatible(
                    set(res),
                    set(res2)
                ), 'Bindings do not match: \n%s\n!=\n%s' % (
                    res.serialize(format='txt', namespace_manager=g.namespace_manager),
                    res2.serialize(format='txt', namespace_manager=g.namespace_manager))
            elif res.type == 'ASK':
                eq(res.askAnswer,
                   res2.askAnswer, "Ask answer does not match: %r != %r" % (
                   res.askAnswer, res2.askAnswer))
            elif res.type in ('DESCRIBE', 'CONSTRUCT'):
                assert isomorphic(
                    res.graph, res2.graph), 'graphs are not isomorphic!'
            else:
                raise Exception('Unknown result type: %s' % res.type)

    except Exception, e:

        if isinstance(e, AssertionError):
            failed_tests.append(uri)
            fails[str(e)] += 1
        else:
            error_tests.append(uri)
            errors[str(e)] += 1

        if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
            print "======================================"
            print uri
            print name
            print comment

            if not resfile:
                if syntax:
                    print "Positive syntax test"
                else:
                    print "Negative syntax test"

            if data:
                print "----------------- DATA --------------------"
                print ">>>", data
                print open(data[7:]).read()
            if graphdata:
                print "----------------- GRAPHDATA --------------------"
                for x in graphdata:
                    print ">>>", x
                    print open(x[7:]).read()

            print "----------------- Query -------------------"
            print ">>>", query
            print open(query[7:]).read()
            if resfile:
                print "----------------- Res -------------------"
                print ">>>", resfile
                print open(resfile[7:]).read()

            try:
                pq = parseQuery(open(query[7:]).read())
                print "----------------- Parsed ------------------"
                pprintAlgebra(translateQuery(pq, base=urljoin(query, '.')))
            except:
                print "(parser error)"

            print decodeStringEscape(unicode(e))

            import pdb
            pdb.post_mortem(sys.exc_info()[2])
            # pdb.set_trace()
            # nose.tools.set_trace()
        raise

예제 #50

0

파일 보기

파일: test_dataset.py 프로젝트: RDFLib/rdflib

class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware:
            return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEqual(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print("Please make sure updating the default graph "
                  "is supported by your SPARQL endpoint")

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEqual(len(self.graph), 1)
        # only default exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEqual(len(self.graph), 0)
        # default still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print("Please make sure your SPARQL endpoint has not configured "
                  "its default graph as the union of the named graphs")
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)),
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])

예제 #51

0

파일 보기

파일: app.py 프로젝트: charlesvardeman/Flask-REST-Dorking

import flask_rdf
from flask_rdf.flask import returns_rdf
from flask_restful import Resource, Api

app = Flask(__name__)
api = Api(app)

# set up a custom formatter to return turtle in text/plain to browsers
custom_formatter = flask_rdf.FormatSelector()
custom_formatter.wildcard_mimetype = 'application/ld+json'
custom_formatter.add_format('application/ld+json', 'json-ld')
custom_decorator = flask_rdf.flask.Decorator(custom_formatter)



ds = Dataset(default_union=True)

with open('./dectectorfinalstate.owl', "r") as f:
    result = ds.parse(f, format="application/rdf+xml")

class HelloWorld(Resource):
    @custom_decorator
    def get(self):
        return ds

api.add_resource(HelloWorld, '/detectorfinalstate')


@app.route("/")
def main():
    # This is cached, so for development it is better

예제 #52

0

파일 보기

파일: client.py 프로젝트: Data2Semantics/brwsr

def visit_sparql(url, format='html', depth=1):
    sparqls = get_sparql_endpoints(url)
    predicates = get_predicates(sparqls, url)

    if format == 'html':
        limit_fraction = QUERY_RESULTS_LIMIT / 3
        if len(predicates) > 1:
            predicate_query_limit_fraction = (
                limit_fraction * 2) / len(predicates)
        else:
            predicate_query_limit_fraction = limit_fraction * 2

        results = []

        def predicate_specific_sparql(sparql, query):
            log.debug(query)

            sparql.setQuery(query)
            res = sparql.query().convert()
            results.extend(
                list(res["results"]["bindings"]))

        threads = []
        local_results = []
        for p in predicates:
            q = u"""SELECT DISTINCT ?s ?p ?o ?g WHERE {{
                {{
                GRAPH ?g {{
                    {{
                        <{url}> <{predicate}> ?o .
                        BIND(<{url}> as ?s)
                        BIND(<{predicate}> as ?p)
                    }} UNION {{
                        ?s <{predicate}> <{url}>.
                        BIND(<{url}> as ?o)
                        BIND(<{predicate}> as ?p)
                    }}
                }}
                }} UNION {{
                    {{
                        <{url}> <{predicate}> ?o .
                        BIND(<{url}> as ?s)
                        BIND(<{predicate}> as ?p)
                    }} UNION {{
                        ?s <{predicate}> <{url}>.
                        BIND(<{url}> as ?o)
                        BIND(<{predicate}> as ?p)
                    }}
                }}
            }} LIMIT {limit}""".format(url=url, predicate=p, limit=predicate_query_limit_fraction)

            for s in sparqls:
                # Start processes for each endpoint, for each predicate query
                process = Thread(target=predicate_specific_sparql, args=[s, q])
                process.start()
                threads.append(process)

        url_is_predicate_query = u"""SELECT DISTINCT ?s ?p ?o ?g WHERE {{
            {{
            GRAPH ?g {{
                ?s <{url}> ?o.
                BIND(<{url}> as ?p)
            }}
            }} UNION {{
                ?s <{url}> ?o.
                BIND(<{url}> as ?p)
            }}
        }} LIMIT {limit}""".format(url=url, limit=limit_fraction)

        for s in sparqls:
            process = Thread(target=predicate_specific_sparql,
                             args=[s, url_is_predicate_query])
            process.start()
            threads.append(process)

        # We now pause execution on the main thread by 'joining' all of our started threads.
        # This ensures that each has finished processing the urls.
        for process in threads:
            process.join()

        if LDF_STATEMENTS_URL is not None:
            retrieve_ldf_results(url)

        # We also add local results (result of dereferencing)
        local_results = list(visit_local(url, format))

        results.extend(local_results)

        # If a Druid statements URL is specified, we'll try to receive it as
        # well
        if DRUID_STATEMENTS_URL is not None:
            results.extend(visit_druid(url, format))

        if depth > 1:
            # If depth is larger than 1, we proceed to extend the results with the results of
            # visiting all object resources for every triple in the resultset.
            newresults = []

            objects = set([r['o']['value'] for r in results if r['o']['value'] != url and r['o']['type']=='uri'])

            for o in objects:
                newresults.extend(
                    visit(o, format=format, depth=depth - 1))

            results.extend(newresults)

    else:
        q = u"""
        CONSTRUCT {{
            ?s ?p ?o .
        }} WHERE {{
            {{
            GRAPH ?g {{
                {{
                    <{url}> ?p ?o .
                    BIND(<{url}> as ?s)
                }} UNION {{
                    ?s ?p <{url}>.
                    BIND(<{url}> as ?o)
                }} UNION {{
                    ?s <{url}> ?o.
                    BIND(<{url}> as ?p)
                }}
            }}
            }} UNION {{
                {{
                    <{url}> ?p ?o .
                    BIND(<{url}> as ?s)
                }} UNION {{
                    ?s ?p <{url}>.
                    BIND(<{url}> as ?o)
                }} UNION {{
                    ?s <{url}> ?o.
                    BIND(<{url}> as ?p)
                }}
            }}
        }} LIMIT {limit}""".format(url=url, limit=QUERY_RESULTS_LIMIT)

        result_dataset = Dataset()

        for s in sparqls:
            s.setQuery(q)
            s.setReturnFormat(XML)

            result_dataset += s.query().convert()

        if format == 'jsonld':
            results = result_dataset.serialize(format='json-ld')
        elif format == 'rdfxml':
            s.setReturnFormat(XML)
            results = result_dataset.serialize(format='pretty-xml')
        elif format == 'turtle':
            s.setReturnFormat(XML)
            results = result_dataset.serialize(format='turtle')
        else:
            results = 'Nothing'

    log.debug("Received results")

    return results

예제 #53

0

파일 보기

파일: dikb_np_assertion_to_nanopublication.py 프로젝트: dbmi-pitt/DIKB-Micropublication

def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")
	ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#")
	ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#")
	
	bindings = g.query(interactSelect)
	for b in bindings:
	
		asIndex = b['a'].decode('utf-8').rfind('-')		   
		identifier = b['a'].decode('utf-8')[asIndex:]
		predicateType = b['t'].decode('utf-8')

		npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier
		headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier
		pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier
		provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier
		aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier

		ds.add(( aURI, RDF.type, np.assertion))
		
		head = ds.add_graph(headURI)
		head.add((npURI, RDF.type, np['Nanopublication']))
		head.add((provURI, RDF.type, np['Provenance']))
		head.add((pubInfoURI, RDF.type, np['PublicationInfo']))
		head.add((npURI, np['hasAssertion'], aURI))
		head.add((npURI, np['hasProvenance'], provURI))
		head.add((npURI, np['hasPublicationInfo'], pubInfoURI))

		pub = ds.add_graph(pubInfoURI)
		pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
		pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) ))
		
		if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps ))						 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
						
	print ds.serialize(format='trig')

예제 #54

0

파일 보기

파일: init.py 프로젝트: sindikat/linked-data-consumer

'''This script initializes data file, that contains triples'''

from rdflib import Graph, Dataset
from shutil import rmtree
from os.path import exists
from model import update_metagraph

DATAPATH = 'data'
DOMAIN = 'http://abstractnonsense.net/'
GRAPH_NAMESPACE = DOMAIN + 'graph' + '/'
DEFAULT_URI = DOMAIN + 'i'
DEFAULT_GRAPH = GRAPH_NAMESPACE + 'i'

def remove_data(datapath):
    '''SIDE EFFECTS'''
    if exists(datapath):
        rmtree(datapath)
    return None

ds = Dataset(store='Sleepycat')
remove_data(DATAPATH)
ds.open('data', create=True)
g = ds.get_context(identifier=DEFAULT_GRAPH)
g.parse('foaf.ttl', format='n3')

update_metagraph(DEFAULT_GRAPH, DEFAULT_URI, ds)

ds.close()

예제 #55

0

파일 보기

파일: convert-dataset.py 프로젝트: TRUMP-project/documentation

        #             line[str] = strip_tags(line[str])
        #             line[str] = unicode(line[str], errors='replace')
        #         #print line
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource + 'movement_of_people/'


path = 'source_datasets/'
filename = 'Movement_of_people_across_borders_dataset.csv'

dataset = Dataset()
dataset.bind('trumpres', RESOURCE)
dataset.bind('trumpvoc', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, movement_graph = convert_csv(path + filename,dataset,URIRef(graph_uri_base + 'movement_graph'))
serialize_upload(OUTPUT_DIR + 'movement_of_people.trig',dataset)



### Generate VoID metadata
from rdflib.void import generateVoID

예제 #56

0

파일 보기

파일: convert-datasets.py 프로젝트: acidghost/knowledge-representation-on-the-web

    response = requests.post(transaction_close_url)

    return str(response.status_code)


def serialize_upload(filename, dataset, upload=True):
    with open(filename, 'w') as f:
        dataset.serialize(f, format='trig')
    upload_to_stardog(dataset.serialize(format='trig'))


graph_uri_base = resource + 'findaslot/'

drop_stardog()

dataset = Dataset()
dataset.bind('fasdat', RESOURCE)
dataset.bind('fasont', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

# Upload vocabulary
with open(VOCAB_FILE, 'r') as f:
    upload_to_stardog(f.read())

dataset, t_graph = convert_dataset(
    SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'), museums=False)
serialize_upload(OUTPUT_DIR + 'theaters.trig', t_graph)

예제 #57

0

파일 보기

파일: test_dawg.py 프로젝트: afujii/rdflib

def update_test(t):

    # the update-eval tests refer to graphs on http://example.org
    rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False

    uri, name, comment, data, graphdata, query, res, syntax = t

    if uri in skiptests:
        raise SkipTest()

    try:
        g = Dataset()

        if not res:
            if syntax:
                translateUpdate(parseUpdate(open(query[7:])))
            else:
                try:
                    translateUpdate(parseUpdate(open(query[7:])))
                    raise AssertionError("Query shouldn't have parsed!")
                except:
                    pass  # negative syntax test
            return

        resdata, resgraphdata = res

        # read input graphs
        if data:
            g.default_context.load(data, format=_fmt(data))

        if graphdata:
            for x, l in graphdata:
                g.load(x, publicID=URIRef(l), format=_fmt(x))

        req = translateUpdate(parseUpdate(open(query[7:])))
        evalUpdate(g, req)

        # read expected results
        resg = Dataset()
        if resdata:
            resg.default_context.load(resdata, format=_fmt(resdata))

        if resgraphdata:
            for x, l in resgraphdata:
                resg.load(x, publicID=URIRef(l), format=_fmt(x))

        eq(set(x.identifier for x in g.contexts() if x != g.default_context),
           set(x.identifier for x in resg.contexts()
               if x != resg.default_context), 'named graphs in datasets do not match')
        assert isomorphic(g.default_context, resg.default_context), \
            'Default graphs are not isomorphic'

        for x in g.contexts():
            if x == g.default_context:
                continue
            assert isomorphic(x, resg.get_context(x.identifier)), \
                "Graphs with ID %s are not isomorphic" % x.identifier

    except Exception, e:

        if isinstance(e, AssertionError):
            failed_tests.append(uri)
            fails[str(e)] += 1
        else:
            error_tests.append(uri)
            errors[str(e)] += 1

        if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
            print "======================================"
            print uri
            print name
            print comment

            if not res:
                if syntax:
                    print "Positive syntax test"
                else:
                    print "Negative syntax test"

            if data:
                print "----------------- DATA --------------------"
                print ">>>", data
                print open(data[7:]).read()
            if graphdata:
                print "----------------- GRAPHDATA --------------------"
                for x, l in graphdata:
                    print ">>>", x, l
                    print open(x[7:]).read()

            print "----------------- Request -------------------"
            print ">>>", query
            print open(query[7:]).read()

            if res:
                if resdata:
                    print "----------------- RES DATA --------------------"
                    print ">>>", resdata
                    print open(resdata[7:]).read()
                if resgraphdata:
                    print "----------------- RES GRAPHDATA -------------------"
                    for x, l in resgraphdata:
                        print ">>>", x, l
                        print open(x[7:]).read()

            print "------------- MY RESULT ----------"
            print g.serialize(format='trig')

            try:
                pq = translateUpdate(parseUpdate(open(query[7:]).read()))
                print "----------------- Parsed ------------------"
                pprintAlgebra(pq)
                # print pq
            except:
                print "(parser error)"

            print decodeStringEscape(unicode(e))

            import pdb
            pdb.post_mortem(sys.exc_info()[2])
        raise

예제 #58

0

파일 보기

파일: model.py 프로젝트: sindikat/linked-data-consumer

from rdflib import Graph, ConjunctiveGraph, Dataset, URIRef, Namespace, Literal
from posixpath import join
from uuid import uuid4
from datetime import datetime
from helper import quote, unquote, url_exists

DATAPATH = 'data'
HTTP = 'http://'
DOMAIN = 'abstractnonsense.net'
STORE = 'Sleepycat'
NAMESPACE = Namespace(join(HTTP, DOMAIN, ''))

ds = Dataset(store=STORE)
ds.open(DATAPATH, create=False) # it stays open all the time, just commits are made

cg = ConjunctiveGraph(store=STORE)
cg.open(DATAPATH, create=False)
# cg.bind('foaf', 'http://xmlns.com/foaf/0.1/') # FOAF namespace understood

# DBPedia workaround
from rdflib.plugin import register, Parser
register('text/rdf+n3', Parser, 'rdflib.plugins.parsers.notation3', 'N3Parser')

def start():
    '''This starts the background script.

    The background script uses a (currently) hardcoded pattern,
    according to which the script harvests data.

    It recursively gathers more and more data, but only to a finite
    depth.

예제 #59

0

파일 보기

파일: default_model.py 프로젝트: aaronhelton/OldMan

from oldman import ClientResourceManager, parse_graph_safely, SPARQLDataStore
from oldman.rest.crud import HashLessCRUDer


logging.config.fileConfig(path.join(path.dirname(__file__), 'logging.ini'))



sesame_iri = "http://*****:*****@context": [
        {

예제 #60

0

파일 보기

파일: convert-datasets.py 프로젝트: acidghost/knowledge-representation-on-the-web

    response = requests.post(transaction_close_url)

    return str(response.status_code)


def serialize_upload(filename, dataset, upload=True):
    with open(filename, 'w') as f:
        dataset.serialize(f, format='trig')
    upload_to_stardog(dataset.serialize(format='trig'))


graph_uri_base = resource + 'findaslot/'

drop_stardog()

dataset = Dataset()
dataset.bind('fasdat', RESOURCE)
dataset.bind('fasont', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset, t_graph = convert_dataset(
    SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'))
serialize_upload(OUTPUT_DIR + 'theaters.trig', dataset)
dataset.remove_graph(t_graph)

dataset, mg_graph = convert_dataset(
    SOURCE_DATA_DIR + 'MuseaGalleries.json', dataset, URIRef(graph_uri_base + 'museums'))
serialize_upload(OUTPUT_DIR + 'museums.trig', dataset)
dataset.remove_graph(mg_graph)