def get_step_triples(update_graph, uri, step_def, debug=True): """ Return the triples matching the criteria defined in the current step of an update :param update_graph: the update graph :param uri: uri of the entity currently the subject of an update :param step_def: step definition from update_def :return: Graph containing one or more triples that match the criteria for the step """ from rdflib import Graph from vivopump import vivo_query, add_qualifiers, make_rdf_term if 'qualifier' not in step_def['object']: g = update_graph.triples((uri, step_def['predicate']['ref'], None)) else: q = 'select (?' + step_def['object']['name'] +' as ?o) where { <' + str(uri) + '> <' + \ str(step_def['predicate']['ref']) + '> ?' + step_def['object']['name'] + ' .\n' + \ add_qualifiers([step_def]) + ' }\n' if debug: print "\nStep Triples Query\n", q result_set = vivo_query(q) g = Graph() for binding in result_set['results']['bindings']: o = make_rdf_term(binding['o']) g.add((uri, step_def['predicate']['ref'], o)) if debug: print "Step Triples", len(g) return g
def generate_productlist(config, data): g = Graph('IOMemory', BNode()) for product_data in data['products']: product = URIRef(link(product_data['link'])+"#subject") add_lang_names(g, product, product_data['names'], rel=[SCHEMA.name, DCTERMS.title]) g.add((product, RDF.type, SCHEMA.CreativeWork)) return g
def test_spatial_wrong_geometries(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('Not GeoJSON', datatype=GEOJSON_IMT))) g.add((spatial_uri, LOCN.geometry, Literal('Not WKT', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert_true('spatial' not in extras)
def test_cax_dw(): """ Test cax-dw rule for OWL 2 RL. If:: T(?c1, owl:disjointWith, ?c2) T(?x, rdf:type, ?c1) T(?x, rdf:type, ?c2) then:: false """ g = Graph() x = T.x c1 = T.c1 c2 = T.c2 g.add((c1, OWL.disjointWith, c2)) g.add((x, RDF.type, c1)) g.add((x, RDF.type, c2)) owlrl.DeductiveClosure(owlrl.OWLRL_Semantics).expand(g) result = next(g.objects(predicate=DAML.error)) expected = Literal( 'Disjoint classes http://test.org/c1 and http://test.org/c2' ' have a common individual http://test.org/x' ) assert expected == result
def generate_artistlist(config, data): g = Graph('IOMemory', BNode()) for artist_data in data['artists']: artist = URIRef(link(artist_data['link'])+"#subject") add_lang_names(g, artist, artist_data['names'], rel=[FOAF.name]) g.add((artist, RDF.type, SCHEMA.MusicGroup)) return g
def test_spatial_one_dct_spatial_instance_no_uri(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) location_ref = BNode() g.add((dataset, DCT.spatial, location_ref)) g.add((location_ref, RDF.type, DCT.Location)) g.add((location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))) g.add((location_ref, SKOS.prefLabel, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert_true('spatial_uri' not in extras) eq_(extras['spatial_text'], 'Newark') eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
def test_spatial_both_geojson_and_wkt(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))) g.add((spatial_uri, LOCN.geometry, Literal('POINT (67 89)', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
def testGraphAdd(self): g1 = Graph() g2 = Graph() tarek = self.tarek # michel = self.michel bob = self.bob likes = self.likes # hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g2.add((bob, likes, cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, True)
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] if toolkit.check_ckan_version(min_version='2.3'): eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv') else: eq_(resource['format'], u'Comma Separated Values')
def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph))
def testSub(self): g1 = self.graph g2 = Graph(store=g1.store) tarek = self.tarek # michel = self.michel bob = self.bob likes = self.likes # hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g1.add((bob, likes, cheese)) g2.add((bob, likes, cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, False)
def content(): # Initialize RDF graph payload = Graph() # Bind the required namespaces payload.bind('ssn', SSN) payload.bind('dc', DCTERMS) payload.bind('foaf', FOAF) doc = URIRef('') this = URIRef('#this') # Add triples payload.add((doc, RDF.type, FOAF.Document)) payload.add((doc, FOAF.primaryTopic, this)) payload.add((this, RDF.type, URIRef(r.type))) payload.add((this, DCTERMS.title, Literal(r.name))) payload.add((this, DCTERMS.description, Literal(r.description))) for l in r.linked_resources: payload.add((this, SIOC.container_of, URIRef(l))) # Serialize the resulting graph return payload.serialize(format='turtle')
def annotateConfidence(target, un, con, com): # thisAnnotation id is the full string, eg: # http://chartex.org/user/jjon/annotation/dc9d7cbdd0ebefb583e46fc2b79bc8cedde34d68 # the last element being a hash (hashlib.sha1(oa:hastarget).hexdigest()) of this full string: # http://chartex.org/graphid/Person_11139might_bePerson_11339 (this triple is actually in there, why?, weird! target = re.sub('[<>]', '', target) thisAnnotationURI = "http://chartex.org/user/%s/annotation/%s" % (un, sha1(target).hexdigest()) confidence = Literal(con) if con == 'nochange' else Literal(con,datatype=XSD.decimal) #TODO: if no change, create no confidenceMetric triple for the annotation OR insert original decimal value if (int(annotationExists('<' + thisAnnotationURI + '>')) > 0): return ("You've already annotated this statement: %s \nPresumably you could make a separate annotation with a different username. If you start doing that, you should keep track of all your usernames. When we have authentication and session logic, this won't be necessary.\n\nAnnotation triples:\n" % (target,), getSingleConfidenceAnnotation('<' + thisAnnotationURI + '>', 'application/rdf+xml')) else: thisann = URIRef(thisAnnotationURI) g = Graph() bodyNode = BNode() triples = [ (thisann, RDF.type, oa.Annotation), (thisann, oa.hasTarget, URIRef(target)), (thisann, oa.hasBody, bodyNode), (bodyNode, chartex.suggestedConfidenceMetric, confidence), (bodyNode, chartex.userComment, Literal(com)) ] for t in triples: g.add(t) r = requests.post( AGVM_VC_REPO + "/statements", headers={'Content-Type': 'text/turtle'}, data=g.serialize(format='turtle'), auth=AG_AUTH ) return (g.serialize(format='pretty-xml'), r.__dict__)
def test_load_from_model(self): """Can we round trip through a RDF model""" model = Graph() path = '/root/42BW9AAXX/C1-38/Project_12345/' filename = '12345_AAATTT_L003_R1_001.fastq.gz' seq = sequences.parse_fastq(path, filename) seq.save_to_model(model) seq_id = 'file://'+path+filename seqNode = URIRef(seq_id) libNode = URIRef('http://localhost/library/12345') model.add((seqNode, libraryOntology['library'], libNode)) seq2 = sequences.SequenceFile.load_from_model(model, seq_id) self.assertEqual(seq.flowcell, seq2.flowcell) self.assertEqual(seq.flowcell, '42BW9AAXX') self.assertEqual(seq.filetype, seq2.filetype) self.assertEqual(seq2.filetype, 'split_fastq') self.assertEqual(seq.lane, seq2.lane) self.assertEqual(seq2.lane, '3') self.assertEqual(seq.read, seq2.read) self.assertEqual(seq2.read, 1) self.assertEqual(seq.project, seq2.project) self.assertEqual(seq2.project, '12345') self.assertEqual(seq.index, seq2.index) self.assertEqual(seq2.index, 'AAATTT') self.assertEqual(seq.split, seq2.split) self.assertEqual(seq2.split, '001') self.assertEqual(seq.cycle, seq2.cycle) self.assertEqual(seq.pf, seq2.pf) self.assertEqual(seq2.libraryNode, libNode) self.assertEqual(seq.path, seq2.path)
def rdf_get(self, departments): us_dept = URIRef('https://en.wikipedia.org/wiki/List_of_federal_agencies_in_the_United_States') g = Graph() for dept in departments: this_dept = URIRef('http://127.0.0.1:5000/departments/{0}'.format(urllib.quote(dept))) g.add((this_dept, RDF.type, us_dept,)) return g.serialize(format='n3')
def test_post_bad_type_to_base(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, KTBS.contains, created)) graph.add((created, RDF.type, RDFS.Resource)) with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def test_post_no_type_to_base(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, RDFS.seeAlso, created)) graph.add((created, RDF.type, KTBS.hasModel)) # in correct NS with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def prepareSellResponse(urlSend): g = Graph() enviaments = Graph() enviaments.parse(open('../data/enviaments'), format='turtle') urlProducts = [] for item in enviaments.objects(subject=urlSend, predicate=ECSDI.Envia): for product in enviaments.objects(subject=item, predicate=ECSDI.productos): urlProducts.append(product) products = Graph() products.parse(open('../data/productes'), format='turtle') for item in urlProducts: marca = products.value(subject=item, predicate=ECSDI.Marca) modelo = products.value(subject=item, predicate=ECSDI.Modelo) nombre = products.value(subject=item, predicate=ECSDI.Nombre) precio = products.value(subject=item, predicate=ECSDI.Precio) g.add((item, RDF.type, ECSDI.Producto)) g.add((item, ECSDI.Marca, Literal(marca, datatype=XSD.string))) g.add((item, ECSDI.Modelo, Literal(modelo, datatype=XSD.string))) g.add((item, ECSDI.Precio, Literal(precio, datatype=XSD.float))) g.add((item, ECSDI.Nombre, Literal(nombre, datatype=XSD.string))) return g
def test_post_no_type_to_root(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, RDFS.seeAlso, created)) graph.add((created, RDF.type, KTBS.Base)) with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def link_municipalities(municipalities: Graph, warsa_endpoint: str, arpa_endpoint: str): """ Link to Warsa municipalities. """ warsa_munics = r.helpers.read_graph_from_sparql(warsa_endpoint, graph_name='http://ldf.fi/warsa/places/municipalities') log.info('Using Warsa municipalities with {n} triples'.format(n=len(warsa_munics))) municipalities.remove((None, SCHEMA_CAS.current_municipality, None)) municipalities.remove((None, SCHEMA_CAS.wartime_municipality, None)) pnr_arpa = Arpa(arpa_endpoint) municipalities = link_to_pnr(municipalities, SCHEMA_CAS.current_municipality, None, pnr_arpa)['graph'] for casualty_munic in list(municipalities[:RDF.type:SCHEMA_CAS.Municipality]): labels = list(municipalities[casualty_munic:SKOS.prefLabel:]) warsa_match = link_warsa_municipality(warsa_munics, labels) if warsa_match: municipalities.add((casualty_munic, SCHEMA_CAS.wartime_municipality, warsa_match)) preferred = warsa_match or \ municipalities.value(casualty_munic, SCHEMA_CAS.current_municipality) or \ casualty_munic if preferred: municipalities.add((casualty_munic, SCHEMA_CAS.preferred_municipality, preferred)) return municipalities
def test_rdflib_to_graphtool(): try: from graph_tool import util as gt_util except ImportError: raise SkipTest("couldn't find graph_tool") from rdflib.extras.external_graph_libs import rdflib_to_graphtool g = Graph() a, b, l = URIRef('a'), URIRef('b'), Literal('l') p, q = URIRef('p'), URIRef('q') edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] for t in edges: g.add(t) mdg = rdflib_to_graphtool(g) assert len(list(mdg.edges())) == 4 vpterm = mdg.vertex_properties['term'] va = gt_util.find_vertex(mdg, vpterm, a)[0] vb = gt_util.find_vertex(mdg, vpterm, b)[0] vl = gt_util.find_vertex(mdg, vpterm, l)[0] assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] epterm = mdg.edge_properties['term'] assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1 mdg = rdflib_to_graphtool( g, e_prop_names=[text_type('name')], transform_p=lambda s, p, o: {text_type('name'): text_type(p)}) epterm = mdg.edge_properties['name'] assert len(list(gt_util.find_edge(mdg, epterm, text_type(p)))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, text_type(q)))) == 1
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal("text/csv"))) g.add((distribution1_1, DCT["format"], Literal("Comma Separated Values"))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]["resources"][0] if toolkit.check_ckan_version(min_version="2.3"): eq_(resource["format"], u"CSV") eq_(resource["mimetype"], u"text/csv") else: eq_(resource["format"], u"Comma Separated Values")
def directory_search_message(type): """ Busca en el servicio de registro mandando un mensaje de request con una accion Seach del servicio de directorio Podria ser mas adecuado mandar un query-ref y una descripcion de registo con variables :param type: :return: """ global mss_cnt logger.info('Buscamos en el servicio de registro') gmess = Graph() gmess.bind('foaf', FOAF) gmess.bind('dso', DSO) reg_obj = agn[AgentePersonal.name + '-search'] gmess.add((reg_obj, RDF.type, DSO.Search)) gmess.add((reg_obj, DSO.AgentType, type)) msg = build_message(gmess, perf=ACL.request, sender=AgentePersonal.uri, receiver=DirectoryAgent.uri, content=reg_obj, msgcnt=mss_cnt) gr = send_message(msg, DirectoryAgent.address) mss_cnt += 1 logger.info('Recibimos informacion del agente') return gr
def browser_retorna(): global compres if request.method == 'GET': logger.info('Mostramos las compras realizadas') count, counts = get_all_sells() return render_template('retorna.html', compres=compres, count=count, sizes=counts) else: logger.info('Empezamos el proceso de devolucion') sells_checked = [] for item in request.form.getlist("checkbox"): sells_checked.append(compres[int(item)][0]) logger.info("Creando la peticion de compra") g = Graph() content = ECSDI['Peticion_retorno_' + str(get_count())] g.add((content, RDF.type, ECSDI.Peticion_retorno)) for item in sells_checked: g.add((content, ECSDI.CompraRetornada, URIRef(item))) seller = get_agent_info(agn.SellerAgent, DirectoryAgent, UserPersonalAgent, get_count()) send_message( build_message(g, perf=ACL.request, sender=UserPersonalAgent.uri, receiver=seller.uri, msgcnt=get_count(), content=content), seller.address) return render_template('endRetorna.html')
def infoagent_search_message(addr, ragn_uri): """ Envia una accion a un agente de informacion """ global mss_cnt logger.info('Hacemos una peticion al servicio de informacion') gmess = Graph() # Supuesta ontologia de acciones de agentes de informacion IAA = Namespace('IAActions') gmess.bind('foaf', FOAF) gmess.bind('iaa', IAA) reg_obj = agn[AgentePersonal.name + '-info-search'] gmess.add((reg_obj, RDF.type, IAA.Search)) msg = build_message(gmess, perf=ACL.request, sender=AgentePersonal.uri, receiver=ragn_uri, msgcnt=mss_cnt) gr = send_message(msg, addr) mss_cnt += 1 logger.info('Recibimos respuesta a la peticion al servicio de informacion') return gr
def compareCommonTerms(terms,buff): global currentGraph, previousGraph, showDiffs currentGraph.bind("schema","http://schema.org/") currentGraph.bind("dc","http://purl.org/dc/terms/") currentGraph.bind("rdf",RDF) currentGraph.bind("rdfs",RDFS) changedCount = 0 for t in sorted(terms): c = Graph() p = Graph() for trip in currentGraph.triples((URIRef(t),None,None)): c.add(trip) for trip in previousGraph.triples((URIRef(t),None,None)): p.add(trip) newg = c - p dropg = p - c if len(newg) > 0 or len(dropg) > 0: changedCount += 1 buff.write( " Changed term: %s\n" % t) if showDiffs: for s,p,o in newg.triples((None,None,None)): buff.write( " New: %s %s %s\n" % (str(s),currentGraph.qname(p),o)) for s,p,o in dropg.triples((None,None,None)): buff.write( " Dropped: %s %s %s\n" % (str(s),currentGraph.qname(p),o)) return changedCount
def _construct(compiler, sources, query=None): dataset = ConjunctiveGraph() if not isinstance(sources, list): sources = [sources] for sourcedfn in sources: source = sourcedfn['source'] graph = dataset.get_context(URIRef(sourcedfn.get('dataset') or source)) if isinstance(source, (dict, list)): context_data = sourcedfn['context'] if not isinstance(context_data, list): context_data = compiler.load_json(context_data )['@context'] context_data = [compiler.load_json(ctx)['@context'] if isinstance(ctx, unicode) else ctx for ctx in context_data] to_rdf(source, graph, context_data=context_data) elif isinstance(source, Graph): graph += source else: graph += compiler.cached_rdf(source) if not query: return graph with compiler.path(query).open() as fp: result = dataset.query(fp.read()) g = Graph() for spo in result: g.add(spo) return g
def convert_graveyards(self, uri, graph: Graph): """ Convert graveyard information into URIs. Check if the created URI exists in cemeteries ontology. """ mun = graph.value(uri, SCHEMA_CAS.municipality_of_burial) if not mun or str(mun) == 'X': return graph gy = graph.value(uri, SCHEMA_CAS.graveyard_number) gy_uri = '{base}h{mun}'.format(base=CEMETERIES, mun=str(mun).split('/k')[-1]) # mun_uri = '{base}k{mun}'.format(base=KUNNAT, mun=mun) if gy: gy_uri += '_{gy}'.format(gy=gy) else: return graph gy_uri = URIRef(GRAVEYARD_MAPPING.get(gy_uri, gy_uri)) if gy_uri not in self.cemeteries: logging.info('Cemetery {gy} not found for person {p}'.format(gy=gy_uri, p=uri)) return graph if str(gy).isnumeric(): graph.add((uri, SCHEMA_WARSA.buried_in, gy_uri)) graph.remove((uri, SCHEMA_CAS.graveyard_number, gy)) return graph
def notify(uri): g = Graph() g.add((URIRef(uri), RDF.type, URIRef('http://www.bbc.co.uk/search/schema/ContentItem'))) g.add((URIRef(uri), URIRef('http://www.bbc.co.uk/search/schema/url'), Literal(uri))) g.parse(uri) return g.serialize(format='nt').decode('utf-8')
def process_search(): # Asumimos que hay una accion de busqueda que puede tener # diferentes parametros en funcion de si se busca un tipo de agente # o un agente concreto por URI o nombre # Podriamos resolver esto tambien con un query-ref y enviar un objeto de # registro con variables y constantes # Solo consideramos cuando Search indica el tipo de agente # Buscamos una coincidencia exacta # Retornamos el primero de la lista de posibilidades logger.info('Peticion de busqueda') agn_type = gm.value(subject=content, predicate=DSO.AgentType) rsearch = dsgraph.triples((None, DSO.AgentType, agn_type)) if rsearch is not None: agn_uri = rsearch.next()[0] agn_add = dsgraph.value(subject=agn_uri, predicate=DSO.Address) gr = Graph() gr.bind('dso', DSO) rsp_obj = agn['Directory-response'] gr.add((rsp_obj, DSO.Address, agn_add)) gr.add((rsp_obj, DSO.Uri, agn_uri)) return build_message(gr, ACL.inform, sender=DirectoryAgent.uri, msgcnt=mss_cnt, receiver=agn_uri, content=rsp_obj) else: # Si no encontramos nada retornamos un inform sin contenido return build_message(Graph(), ACL.inform, sender=DirectoryAgent.uri, msgcnt=mss_cnt)
class ShaclSerializer(object): def __init__(self, target_file, shapes_list, namespaces_dict=None, string_return=False, instantiation_property_str=RDF_TYPE_STR, wikidata_annotation=False): self._target_file = target_file self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {} self._shapes_list = shapes_list self._string_return = string_return self._instantiation_property_str = instantiation_property_str self._wikidata_annotation = wikidata_annotation self._g_shapes = Graph() # self._uri_dict = {} def serialize_shapes(self): self._add_namespaces() self._add_shapes() return self._produce_output() #################### NAMESPACES def _add_namespaces(self): self._add_param_namespaces() self._add_shacl_namespace_if_needed() def _add_param_namespaces(self): for a_namespace, a_prefix in self._namespaces_dict.items(): self._add_namespace(prefix=a_prefix, namespace_str=a_namespace) def _add_namespace(self, prefix, namespace_str): self._g_shapes.bind(prefix=prefix, namespace=Namespace(namespace_str)) def _add_shacl_namespace_if_needed(self): if _SHACL_NAMESPACE in self._namespaces_dict: # shacl already included return curr_prefixes = self._namespaces_dict.values() for a_prefix in _SHACL_PRIORITY_PREFIXES: # trying default prefixes if a_prefix not in curr_prefixes: self._add_shacl_namespace(a_prefix) return counter = 1 # going for random prefixes, no defs. available candidate_pref = _SHACL_PRIORITY_PREFIXES[0] + str(counter) while candidate_pref in curr_prefixes: counter += 1 candidate_pref = _SHACL_PRIORITY_PREFIXES[0] + str(counter) self._add_shacl_namespace(candidate_pref) def _add_shacl_namespace(self, shacl_prefix): self._add_namespace(prefix=shacl_prefix, namespace_str=_SHACL_NAMESPACE) self._namespaces_dict[_SHACL_NAMESPACE] = shacl_prefix #################### SHAPES def _add_shapes(self): for a_shape in self._shapes_list: self._add_shape(a_shape) def _add_shape(self, shape): r_shape_uri = self._generate_shape_uri(shape_name=shape.name) self._add_shape_uri(r_shape_uri=r_shape_uri) self._add_target_class(r_shape_uri=r_shape_uri, shape=shape) self._add_shape_constraints(shape=shape, r_shape_uri=r_shape_uri) def _add_target_class(self, shape, r_shape_uri): if shape.class_uri is not None: self._add_triple(r_shape_uri, _R_SHACL_TARGET_CLASS_PROP, URIRef(shape.class_uri)) # TODO check if this is always an abs. URI, not sure def _add_shape_constraints(self, shape, r_shape_uri): for a_statement in shape.yield_statements(): self._add_constraint(statement=a_statement, r_shape_uri=r_shape_uri) def _is_instantiation_property(self, str_property): return str_property == self._instantiation_property_str def _add_constraint(self, statement, r_shape_uri): if self._is_instantiation_property(statement.st_property): self._add_instantiation_constraint(statement=statement, r_shape_uri=r_shape_uri) else: self._add_regular_constraint(statement=statement, r_shape_uri=r_shape_uri) def _add_exactly_one_cardinality(self, r_constraint_node): self._add_min_occurs(r_constraint_node=r_constraint_node, min_occurs=1) self._add_max_occurs(r_constraint_node=r_constraint_node, max_occurs=1) def _add_in_instance(self, r_constraint_node, statement): target_node = self._generate_r_uri_for_str_uri(statement.st_type) list_seed_node = self._generate_bnode() self._add_triple(r_constraint_node, _R_SHACL_IN_PROP, list_seed_node) self._add_triple(list_seed_node, RDF.first, target_node) self._add_triple(list_seed_node, RDF.rest, RDF.nil) def _add_instantiation_constraint(self, statement, r_shape_uri): r_constraint_node = self._generate_bnode() self._add_bnode_property(r_shape_uri=r_shape_uri, r_constraint_node=r_constraint_node) self._add_path(statement=statement, r_constraint_node=r_constraint_node) self._add_exactly_one_cardinality(r_constraint_node=r_constraint_node) self._add_in_instance(statement=statement, r_constraint_node=r_constraint_node) def _add_regular_constraint(self, statement, r_shape_uri): r_constraint_node = self._generate_bnode() self._add_bnode_property(r_shape_uri=r_shape_uri, r_constraint_node=r_constraint_node) self._add_path(statement=statement, r_constraint_node=r_constraint_node) self._add_node_type(statement=statement, r_constraint_node=r_constraint_node) self._add_cardinality(statement=statement, r_constraint_node=r_constraint_node) def _add_path(self, statement, r_constraint_node): r_property_uri = self._generate_r_uri_for_str_uri(statement.st_property) self._add_triple(r_constraint_node, _R_SHACL_PATH_PROP, r_property_uri) def _generate_r_uri_for_str_uri(self, property_str): if property_str.startswith("<") and property_str.endswith(">"): return URIRef(property_str[1:-1]) elif property_str.startswith("http://"): return URIRef(property_str) raise ValueError("Check here:", property_str) def _is_a_shape(self, target_type): return target_type.startswith(STARTING_CHAR_FOR_SHAPE_NAME) def _is_literal(self, target_type): return target_type == LANG_STRING_TYPE or target_type.startswith(XSD_NAMESPACE) def _is_macro(self, target_type): return target_type in _MACRO_MAPPING def _add_dataType_literal(self, r_constraint_node, target_type): if target_type == LANG_STRING_TYPE: type_node = _R_LANG_STRING elif target_type.endswith("integer"): type_node = XSD.integer elif target_type.endswith("float"): type_node = XSD.float elif target_type.endswith("string"): type_node = XSD.string else: raise ValueError("Check here:" + target_type) self._add_triple(r_constraint_node, _R_SHACL_DATATYPE_PROP, type_node) def _add_node_shape(self, r_constraint_node, target_type): self._add_triple(r_constraint_node, _R_SHACL_NODE_PROP, self._generate_shape_uri(shape_name=target_type)) def _add_nodeKind_macro(self, r_constraint_node, target_type): type_node = _MACRO_MAPPING[target_type] if type_node is not None: self._add_triple(r_constraint_node, _R_SHACL_NODEKIND_PROP, type_node) def _add_node_type(self, statement, r_constraint_node): # sh:dataType for literal types # sh:nodeKind for IRI or similar macros. # sh:node for a shape if self._is_literal(statement.st_type): self._add_dataType_literal(r_constraint_node=r_constraint_node, target_type=statement.st_type) elif self._is_macro(statement.st_type): self._add_nodeKind_macro(r_constraint_node=r_constraint_node, target_type=statement.st_type) elif self._is_a_shape(statement.st_type): self._add_node_shape(r_constraint_node=r_constraint_node, target_type=statement.st_type) else: raise ValueError("Check here: ") def _min_occurs_from_cardinality(self, cardinality): if cardinality in [KLEENE_CLOSURE, OPT_CARDINALITY]: return None elif cardinality == POSITIVE_CLOSURE: return 1 else: return cardinality def _max_occurs_from_cardinality(self, cardinality): if cardinality in [KLEENE_CLOSURE, POSITIVE_CLOSURE]: return None elif cardinality == OPT_CARDINALITY: return 1 else: return cardinality def _generate_r_literal(self, value, l_type): return Literal(value, datatype=self._map_rdflib_datatype(l_type)) def _map_rdflib_datatype(self, l_type): if l_type == _INTEGER: return XSD.integer elif l_type == _STRING: return XSD.string else: raise ValueError("Check here: " + l_type) def _add_min_occurs(self, r_constraint_node, min_occurs): self._add_triple(r_constraint_node, _R_SHACL_MIN_COUNT_PROP, self._generate_r_literal(value=min_occurs, l_type=_INTEGER)) def _add_max_occurs(self, r_constraint_node, max_occurs): self._add_triple(r_constraint_node, _R_SHACL_MAX_COUNT_PROP, self._generate_r_literal(value=max_occurs, l_type=_INTEGER)) def _add_cardinality(self, statement, r_constraint_node): min_occurs = self._min_occurs_from_cardinality(statement.cardinality) max_occurs = self._max_occurs_from_cardinality(statement.cardinality) if min_occurs is not None: self._add_min_occurs(r_constraint_node=r_constraint_node, min_occurs=min_occurs) if max_occurs is not None: self._add_max_occurs(r_constraint_node=r_constraint_node, max_occurs=max_occurs) def _add_bnode_property(self, r_shape_uri, r_constraint_node): self._add_triple(r_shape_uri, _R_SHACL_PROPERTY_PROP, r_constraint_node) self._add_triple(r_constraint_node, RDF.type, _R_SHACL_PROPERTY_SHAPE_URI) def _generate_shape_uri(self, shape_name): if shape_name.startswith(_EXPECTED_SHAPE_BEGINING) and shape_name.endswith(_EXPECTED_SHAPE_ENDING): return URIRef(shape_name[2:-1]) # Excluding "@<" and "> raise ValueError("Check here:", shape_name) def _add_shape_uri(self, r_shape_uri): self._add_triple(r_shape_uri, RDF.type, _R_SHACL_SHAPE_URI) def _add_triple(self, s, p, o): self._g_shapes.add((s, p, o)) @staticmethod def _generate_bnode(): return BNode() #################### OUTPUT def _produce_output(self): if self._wikidata_annotation: return self._produce_wikidata_annotation_output() # destination = None if self._string_return else self._target_file if self._string_return: return self._g_shapes.serialize(format="turtle").decode("utf-8") else: self._g_shapes.serialize(destination=self._target_file, format="turtle") def _produce_wikidata_annotation_output(self): result = self._g_shapes.serialize(format="turtle").decode("utf-8") result = wikidata_annotation(raw_input=result, string_return=self._string_return, out_file=self._target_file, format=TURTLE_FORMAT, rdfs_comments=False) if self._string_return: return result
def prep_annotation_file(self, basefile): goldstandard = self.eval_get_goldstandard(basefile) baseline_set = self.eval_get_ranked_set_baseline(basefile) baseline_map = self.eval_calc_map( self.eval_calc_aps(baseline_set, goldstandard)) print("Baseline MAP %f" % baseline_map) self.log.info("Calculating ranked set (pagerank, unrestricted)") pagerank_set = self.eval_get_ranked_set(basefile, "pagerank", age_compensation=False, restrict_cited=False) pagerank_map = self.eval_calc_map( self.eval_calc_aps(pagerank_set, goldstandard)) print("Pagerank MAP %f" % pagerank_map) sets = [{ 'label': 'Baseline', 'data': baseline_set }, { 'label': 'Gold standard', 'data': goldstandard }, { 'label': 'PageRank', 'data': pagerank_set }] g = Graph() g.bind('dcterms', self.ns['dcterms']) g.bind('rinfoex', self.ns['rinfoex']) XHT_NS = "{http://www.w3.org/1999/xhtml}" tree = ET.parse(self.parsed_path(basefile)) els = tree.findall("//" + XHT_NS + "div") articles = [] for el in els: if 'typeof' in el.attrib and el.attrib[ 'typeof'] == "eurlex:Article": article = str(el.attrib['id'][1:]) articles.append(article) for article in articles: self.log.info("Results for article %s" % article) articlenode = URIRef("http://lagen.nu/ext/celex/12008E%03d" % int(article)) resultsetcollectionnode = BNode() g.add((resultsetcollectionnode, RDF.type, RDF.List)) rc = Collection(g, resultsetcollectionnode) g.add((articlenode, DCTERMS["relation"], resultsetcollectionnode)) for s in sets: resultsetnode = BNode() listnode = BNode() rc.append(resultsetnode) g.add((resultsetnode, RDF.type, RINFOEX["RelatedContentCollection"])) g.add((resultsetnode, DCTERMS["title"], Literal(s["label"]))) g.add((resultsetnode, DCTERMS["hasPart"], listnode)) c = Collection(g, listnode) g.add((listnode, RDF.type, RDF.List)) if article in s['data']: print((" Set %s" % s['label'])) for result in s['data'][article]: resnode = BNode() g.add((resnode, DCTERMS["references"], Literal(result[0]))) g.add((resnode, DCTERMS["title"], Literal(result[1]))) c.append(resnode) print((" %s" % result[1])) return self.graph_to_annotation_file(g, basefile)
def analyze_baseline_queries(self, analyzed_articles, num_of_keyterms=5): basefile = "tfeu" # Helper from http://effbot.org/zone/element-lib.htm def flatten(elem, include_tail=0): text = elem.text or "" for e in elem: text += flatten(e, 1) if include_tail and elem.tail: text += elem.tail return text # step 1: Create a temporary whoosh index in order to find out # the most significant words for each article #ana = analysis.StandardAnalyzer() ana = analysis.StemmingAnalyzer() # vectorformat = formats.Frequency(ana) schema = fields.Schema(article=fields.ID(unique=True), content=fields.TEXT(analyzer=ana, stored=True)) st = RamStorage() tmpidx = st.create_index(schema) w = tmpidx.writer() XHT_NS = "{http://www.w3.org/1999/xhtml}" tree = ET.parse(self.parsed_path(basefile)) els = tree.findall("//" + XHT_NS + "div") articles = [] for el in els: if 'typeof' in el.attrib and el.attrib[ 'typeof'] == "eurlex:Article": text = util.normalize_space(flatten(el)) article = str(el.attrib['about']) articles.append(article) w.update_document(article=article, content=text) w.commit() self.log.info("Indexed %d articles" % len(articles)) # Step 2: Open the large whoosh index containing the text of # all cases. Then, for each article, use the 5 most distinctive terms # (filtering away numbers) to create a query against that index tempsearch = tmpidx.searcher() g = Graph() g.bind('celex', 'http://lagen.nu/ext/celex/') g.bind('ir', 'http://lagen.nu/informationretrieval#') IR = Namespace('http://lagen.nu/informationretrieval#') # celex:12008E264 ir:keyterm "blahonga"@en. outfile = self.generic_path("keyterms", "analyzed", ".tex") util.ensure_dir(outfile) fp = open(outfile, "w") fp.write(""" \\begin{tabular}{r|%s} \\hline \\textbf{Art.} & \\multicolumn{%s}{l}{\\textbf{Terms}} \\\\ \\hline """ % ("l" * num_of_keyterms, num_of_keyterms)) for article in analyzed_articles: fp.write(str(int(article.split("E")[1]))) r = tempsearch.search(query.Term("article", article)) terms = r.key_terms("content", numterms=num_of_keyterms + 1) terms = [t[0] for t in terms if not t[0].isdigit()][:num_of_keyterms] for term in terms: fp.write(" & " + term) g.add((URIRef(article), IR["keyterm"], Literal(term, lang="en"))) self.log.debug("Article %s:%r" % (article, terms)) fp.write("\\\\\n") fp.write(""" \\hline \\end{tabular} """) fp.close() outfile = self.generic_path("keyterms", "analyzed", ".n3") util.ensure_dir(outfile) fp = open(outfile, "w") fp.write(g.serialize(format="n3")) fp.close()
?x owl:sameAs ?sameas } """ resultID = g2.query(spar2) for itemencyc in resultencyc: for itemid in resultID: if itemencyc[0].encode('utf-8') == itemid[0].encode('utf-8'): g.add((URIRef(itemencyc[0].encode('utf-8')), owl.sameAs, (URIRef(itemid[2])))) if 'NA' in itemid[1]: gnd = 'NA' g.add((URIRef(itemencyc[0].encode('utf-8')), gndo.gndIdentifier, (Literal(gnd)))) elif itemid[1].rsplit('/', 1)[1] == '-': gnd = 'NA' g.add((URIRef(itemencyc[0].encode('utf-8')), gndo.gndIdentifier, (Literal(gnd)))) else:
**Task 06: Modifying RDF(s)** """ !pip install rdflib github_storage = "https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2020-2021/master/Assignment3" """Leemos el fichero RDF de la forma que lo hemos venido haciendo""" from rdflib import Graph, Namespace, Literal from rdflib.namespace import RDF, RDFS g = Graph() g.namespace_manager.bind('ns', Namespace("http://somewhere#"), override=False) g.namespace_manager.bing('vcard', Namespace("http://www.w3.org/2001/vcard-rdf/3.0#"), override=False) g.parse(github_storage+"/resources/example5.rdf", format="xml") """Create a new class named Researcher""" g.add((ns.Researcher, RDF.type, RDFS.Class)) for s, p, o in g: print(s,p,o) """**TASK 6.1: Create a new class named "University"** **TASK 6.2: Add "Researcher" as a subclass of "Person"** **TASK 6.3: Create a new individual of Researcher named "Jane Smith"** **TASK 6.4: Add to the individual JaneSmith the fullName, given and family names** **TASK 6.5: Add UPM as the university where John Smith works** """
import json import sys sc = Namespace("http://iiif.io/api/presentation/2#") oa = Namespace("http://www.w3.org/ns/oa#") jw = Namespace("http://localhost:8000/jabberwocky/") def StrLiteral(s): """String literal convenience method.""" return Literal(s, datatype=XSD.string) # Build RDF g = Graph() g.add((jw.manifest, RDF.type, sc.Manifest)) # Simple descriptive information g.add((jw.manifest, RDFS.label, StrLiteral("Jabberwocky"))) g.add((jw.manifest, DC.description, StrLiteral("A bad edition of wonderful nonsense."))) # Label/value pairs author = BNode() g.add((jw.manifest, sc.metadataLabels, author)) g.add((author, RDFS.label, StrLiteral("Author"))) g.add((author, RDF.value, StrLiteral("Lewis Carroll"))) # Get JSON-LD object in PyLD form jld = pyld_json_from_rdflib_graph(g) # Frame and compact... framed = jsonld.compact(
class PelagiosGraph(): """ Uses the PelagiosData object to to make Pelagios compliant open annotations from opencontext_py.apps.ldata.pelagios.graph import PelagiosGraph pelagios = PelagiosGraph() pelagios.project_uuids = ['3'] pelagios.make_graph() pelagios.g.serialize(format='turtle') from opencontext_py.apps.ldata.pelagios.graph import PelagiosGraph pelagios = PelagiosGraph() pelagios.do_web_annotations = True pelagios.get_graph() pelagios.g.serialize(format='turtle') """ NAMESPACES = { 'cnt': 'http://www.w3.org/2011/content#', 'dcterms': 'http://purl.org/dc/terms/', 'foaf': 'http://xmlns.com/foaf/0.1/', 'oa': 'http://www.w3.org/ns/oa#', 'pelagios': 'http://pelagios.github.io/vocab/terms#', 'relations': 'http://pelagios.github.io/vocab/relations#', # 'xsd': 'http://www.w3.org/2001/XMLSchema', 'oc-gen': 'http://opencontext.org/vocabularies/oc-general/' } def __init__(self): self.data_obj = PelagiosData() self.project_uuids = [] self.do_web_annotations = False self.test_limit = None self.g = None self.prep_graph() self.base_uri = settings.CANONICAL_HOST + '/pelagios/data/' self.annoations_uri_part = '#annotations/' self.anno_index = 0 self.request = False self.refresh_cache = False self.print_caching = False self.cache_ok = True self.cache_timeout = None # None means forever def get_graph(self): """ get graph """ s_rdf = SerizializeRDF() if self.do_web_annotations: key = 'pelagios-web' else: key = self.make_cache_key('pelagios', '-'.join(self.project_uuids)) if self.refresh_cache: g = None else: g = s_rdf.get_graph_from_file(key) if g is None: # make graph based on data from the database self.make_graph() s_rdf.save_serialized_graph(key, self.g) else: # we have graph data! self.g = g def prep_graph(self): """ prepares a graph for Pelagios """ self.g = Graph() for prefix, ns_uri in self.NAMESPACES.items(): ns = Namespace(ns_uri) self.g.bind(prefix, ns) def make_graph(self): associated_uris = [] if self.do_web_annotations: self.data_obj = PelagiosGazetteerAnnotations() self.data_obj.make_annotations() else: self.get_db_data() if len(self.data_obj.oa_items) > 0: for uuid, oa_item in self.data_obj.oa_items.items(): if oa_item.is_valid and len(oa_item.gazetteer_uris) > 0: # only make annotations if the item is valid and actually has # gazetteer uris self.make_add_triple( oa_item.uri, RDF.type, self.make_full_uri('pelagios', 'AnnotatedThing')) self.make_add_triple( oa_item.uri, self.make_full_uri('dcterms', 'title'), None, oa_item.title) """ # commented out, don't need it self.make_add_triple(oa_item.uri, self.make_full_uri('foaf', 'homepage'), oa_item.uri) """ if isinstance(oa_item.description, str): # add description self.make_add_triple( oa_item.uri, self.make_full_uri('dcterms', 'description'), None, oa_item.description) if self.check_valid_uri(oa_item.depiction): # in response to issue https://github.com/ekansa/open-context-py/issues/480 self.make_add_triple( oa_item.uri, self.make_full_uri('foaf', 'depiction'), oa_item.depiction) if isinstance(oa_item.temporal, str): # temporal metadata, as string of ISO 8601 '/' seperated intervals self.make_add_triple( oa_item.uri, self.make_full_uri('dcterms', 'temporal'), None, oa_item.temporal) # add language assertion self.make_add_triple( oa_item.uri, self.make_full_uri('dcterms', 'language'), None, settings.LANGUAGE_CODE) # add assertion about part of a project if oa_item.uri != oa_item.project_uri: self.make_add_triple( oa_item.uri, self.make_full_uri('dcterms', 'isPartOf'), oa_item.project_uri) # now add gazetteer annotations to the item if oa_item.contained_project_uuid is not None: base_anno_uri = self.base_uri + oa_item.contained_project_uuid elif oa_item.manifest is not None: base_anno_uri = self.base_uri + oa_item.manifest.project_uuid else: base_anno_uri = self.base_uri + 'web' base_anno_uri += self.annoations_uri_part self.make_gazetteer_annotations(oa_item.uri, oa_item.gazetteer_uris, base_anno_uri) # now add related annotations if len(oa_item.associated) > 0: for ass in oa_item.associated: self.make_add_triple( ass['uri'], RDF.type, self.make_full_uri('pelagios', 'AnnotatedThing')) self.make_add_triple( ass['uri'], self.make_full_uri('dcterms', 'title'), None, ass['title']) """ # commented out, don't need it self.make_add_triple(ass['uri'], self.make_full_uri('foaf', 'homepage'), ass['uri']) """ if self.check_valid_uri(ass['depiction']): # in response to issue https://github.com/ekansa/open-context-py/issues/480 self.make_add_triple( ass['uri'], self.make_full_uri('foaf', 'depiction'), ass['depiction']) self.make_add_triple( ass['uri'], self.make_full_uri('dcterms', 'description'), None, ass['description']) if isinstance(ass['temporal'], str): # temporal metadata, as string of ISO 8601 '/' seperated intervals self.make_add_triple( ass['uri'], self.make_full_uri('dcterms', 'temporal'), None, ass['temporal']) self.make_add_triple( ass['uri'], self.make_full_uri('dcterms', 'language'), None, settings.LANGUAGE_CODE) self.make_add_triple( ass['uri'], self.make_full_uri('dcterms', 'relation'), oa_item.uri) self.make_gazetteer_annotations( ass['uri'], oa_item.gazetteer_uris, base_anno_uri) def check_valid_uri(self, uri): """ checks to see if a uri is valid """ valid = False if isinstance(uri, str): uri_out = False try: uri_test = URIRef(uri) uri_out = uri_test.n3() except: # some sort of error thrown, so not valid valid = False if isinstance(uri_out, str): valid = True return valid def make_gazetteer_annotations(self, target_uri, gazetteer_uris, base_anno_uri): """ makes annotations for a target_uri from from a list of gazetteer_uris """ for gaz_uri in gazetteer_uris: self.anno_index += 1 anno_uri = base_anno_uri + str(self.anno_index) self.make_add_triple(anno_uri, RDF.type, self.make_full_uri('oa', 'Annotation')) self.make_add_triple(anno_uri, self.make_full_uri('oa', 'hasTarget'), target_uri) self.make_add_triple(anno_uri, self.make_full_uri('oa', 'hasBody'), gaz_uri) def make_add_triple(self, sub_uri, pred_uri, obj_uri=None, obj_literal=None): """ makes a triple and adds it to the graph """ act_s = URIRef(sub_uri) act_p = URIRef(pred_uri) if obj_literal is not None: act_o = Literal(obj_literal) else: act_o = URIRef(obj_uri) self.g.add((act_s, act_p, act_o)) def make_full_uri(self, prefix, value): """ makes a full uri for a prefix and value """ if prefix in self.NAMESPACES: output = self.NAMESPACES[prefix] + value else: output = prefix + ':' + value return output def get_db_data(self): """ gets gazetteer related items, then populates these with manifest objects and context paths (for subjects) """ key = self.make_cache_key('pelagios', '-'.join(self.project_uuids)) if self.refresh_cache: # we forcing a refresh of the cache, not us of cached data cache_data_obj = None else: # check to see if we have a cached version cache_data_obj = self.get_cache_object(key) if cache_data_obj is None: self.data_obj.project_uuids = self.project_uuids self.data_obj.test_limit = self.test_limit self.data_obj.get_prep_ocitems_rel_gazetteer() # now cache the data self.save_cache_object(key, self.data_obj) else: # use the cached data for the data object self.data_obj = cache_data_obj def make_cache_key(self, prefix, identifier): """ makes a valid OK cache key """ return str(prefix) + "-" + str(identifier) def make_cache_key_hash(self, prefix, identifier): """ makes a valid OK cache key """ hash_obj = hashlib.sha1() concat_string = str(prefix) + "-" + str(identifier) hash_obj.update(concat_string.encode('utf-8')) return hash_obj.hexdigest() def get_cache_object(self, key): """ gets a cached reddis object """ try: cache = caches['default'] obj = cache.get(key) if self.print_caching: print('Cache checked: ' + key) except: obj = None if self.print_caching: print('Cache Fail checked: ' + key) return obj def save_cache_object(self, key, obj): """ saves a cached reddis object """ try: cache = caches['default'] cache.set(key, obj, self.cache_timeout) ok = True if self.print_caching: print('Cache Saved: ' + key) except: self.cache_ok = False ok = False if self.print_caching: print('Failed to cache: ' + key) return ok
?x <http://purl.org/spar/pro/author> ?author. ?author skos:prefLabel ?name } """ results = g.query(spar) for ubitem in results: print ubitem name = ubitem[3].encode('utf-8') #name of the author for i in range(0, len(gndname)): if name == gndname[i][0]: graphout.add((URIRef(ubitem[0]), RDF.type, edm.ProvidedCHO)) graphout.add((URIRef(ubitem[0]), dc.creator, Literal(name))) graphout.add( (URIRef(ubitem[0]), dc.identifier, Literal(ubitem[1]))) graphout.add( (URIRef(ubitem[0]), gndo.gndIdentifier, URIRef(gndname[i][1]))) graphout.add( (URIRef(ubitem[0]), dc.description, Literal((ubitem[4])))) graphout.add((URIRef(ubitem[0]), dc.title, Literal((ubitem[5])))) graphout.serialize(destination='cm-authors-context-GND-uni-02.rdf', format="turtle")
def cercaHotels(): resultat = Graph() resultat.bind('via', VIA) contingut = Graph() obj_restriccions = gm.value(subject=content, predicate=DEM.Restriccions_hotels) ciutat = gm.value(subject=obj_restriccions, predicate=DEM.Ciutat) dataI = gm.value(subject=obj_restriccions, predicate=DEM.Data_inici) dataF = gm.value(subject=obj_restriccions, predicate=DEM.Data_final) NumPer = gm.value(subject=obj_restriccions, predicate=DEM.NumPersones) preuAllot = gm.value(subject=obj_restriccions, predicate=DEM.Preu) data_ini = stringToDate(dataI) data_fi = stringToDate(dataF) contingut.parse('../../Ontologies/Viatge-RDF.owl', format='xml') res = contingut.query(f""" SELECT ?nm ?c ?ta ?preu ?sit ?t ?testn ?ppn WHERE {{ ?a rdf:type via:Allotjament . ?a via:Nom ?nm . ?a via:Capacitat ?c . ?a via:TipusAllotjament ?ta . ?a via:val ?p . ?p via:Import ?preu . ?a via:es_troba_a ?ciu . ?ciu via:Nom "{ciutat}" . ?a via:se_situa_a ?s . ?s via:Nom ?sit . ?a via:te_habitacions ?th . ?th via:Nom ?t . ?a via:ofereix ?test . ?test via:Nom ?testn . ?a via:es_popular ?pp . ?pp via:Nom ?ppn . }}""", initNs={"via": VIA}) dies = data_fi - data_ini for row in res: if (int(row[1]) >= int(NumPer)): print(row[3]) preuTotal = int(NumPer) * int(row[3]) * (dies.days) print(preuTotal) if (preuTotal <= preuAllot): Allotjaments = VIA.Allotjament + "_" + row[0] resultat.add((Allotjaments, RDF.type, VIA.Allotjament)) resultat.add((Allotjaments, VIA.Nom, Literal(row[0]))) resultat.add( (Allotjaments, VIA.Capacitat, Literal(row[1]))) resultat.add( (Allotjaments, VIA.TipusAllotjament, Literal(row[2]))) resultat.add((Allotjaments, VIA.Preu, Literal(preuTotal))) resultat.add( (Allotjaments, VIA.Nom + "_Situacio", Literal(row[4]))) resultat.add((Allotjaments, VIA.Nom + "_TipusHabitacio", Literal(row[5]))) resultat.add((Allotjaments, VIA.Nom + "_TipusEstada", Literal(row[6]))) resultat.add((Allotjaments, VIA.Nom + "_Popularitat", Literal(row[7]))) resultat.add( (Allotjaments, VIA.Data + "_anada", Literal(dataI))) resultat.add( (Allotjaments, VIA.Data + "_tornada", Literal(dataF))) return resultat
def main(): # Parse Swift book to retrieve concepts and related resources start = "https://docs.swift.org/swift-book/" nextURL = start urls = [nextURL] concepts = {} while nextURL: url = nextURL page = urlopen(url) soup = BeautifulSoup(page, 'html.parser') #title = soup.find('title').string article = soup.select_one('article.page') headings = article.find_all(re.compile('^h[1-6]$')) for heading in headings: heading_text = str(heading.contents[0]).lower() permalink = url + heading.contents[1].get('href') doc = nlp(heading_text) noun_phrases = [chunk for chunk in doc.noun_chunks] if len(noun_phrases) > 0: new_concepts = [lemmatize(lstrip_stopwords(chunk)).strip() for chunk in noun_phrases] else: # if no noun-phrases, take as verbatim (e.g. break, continue) new_concepts = [heading_text] for c in new_concepts: if c not in concepts: concepts[c] = [] if permalink not in concepts[c]: # optionally: don't add if permalink (apart from fragment) is already contained (to avoid reindexing the same page multiple times, as a concept like "Function" might appear many times on its dedicated page in different headers) if not page_included(permalink, concepts[c]): concepts[c].append(permalink) # continue to next page (if any) nextLink = soup.select_one("p.next a") if nextLink: parts = urlsplit(nextURL) base_path, _ = split(parts.path) base_url = urlunsplit((parts.scheme, parts.netloc, join(base_path, ""), parts.query, parts.fragment)) nextURL = urljoin(base_url, nextLink.get('href')) urls.append(nextURL) else: nextURL = None # RDF Graph creation g = Graph() # Namespace bindings NS = Namespace(ALMA_NS + SCHEME_NAME + "#") DBPEDIA = Namespace('http://dbpedia.org/page/') g.namespace_manager.bind('owl', OWL) g.namespace_manager.bind('skos', SKOS) g.namespace_manager.bind('dct', DCTERMS) g.namespace_manager.bind('foaf', FOAF) g.namespace_manager.bind('dbr', DBPEDIA) g.namespace_manager.bind(SCHEME_NAME, NS) # Ontology Metadata ontology = URIRef(ALMA_NS + SCHEME_NAME) g.add((ontology, RDF.type, OWL.term("Ontology"))) g.add((ontology, DCTERMS.term("title"), Literal("{} Ontology".format(SCHEME_NAME.title())))) g.add((ontology, DCTERMS.term("description"), Literal("This is an SKOS-based lightweight ontology about the Swift programming language."))) g.add((ontology, DCTERMS.term("subject"), URIRef(quote("http://dbpedia.org/page/Swift_(programming_language)")))) g.add((ontology, DCTERMS.term("license"), URIRef("https://creativecommons.org/licenses/by-sa/4.0/"))) g.add((ontology, DCTERMS.term("created"), Literal(DATE_CREATED))) g.add((ontology, DCTERMS.term("modified"), Literal(DATE_MODIFIED))) g.add((ontology, RDFS.term("seeAlso"), URIRef("https://coast.uni.lu/alma/"))) g.add((ontology, OWL.term("versionIRI"), URIRef("http://purl.org/lu/uni/alma/{}/{}".format(SCHEME_NAME, LANGUAGE_VERSION)))) g.add((ontology, OWL.term("versionInfo"), Literal("{}/{}".format(LANGUAGE_VERSION, ONTOLOGY_VERSION)))) g.add((ontology, OWL.term("imports"), URIRef("http://www.w3.org/2004/02/skos/core"))) creator = BNode() g.add((ontology, DCTERMS.term("creator"), creator)) g.add((creator, RDF.type, FOAF.term("Person"))) g.add((creator, FOAF.term("name"), Literal(AUTHOR_NAME))) g.add((creator, FOAF.term("mbox"), URIRef(AUTHOR_EMAIL))) # Concept Scheme schemeURI = NS.term("Scheme") g.add((schemeURI, RDF.type, SKOS.term("ConceptScheme"))) g.add((schemeURI, DCTERMS.term("title"), Literal(SCHEME_NAME.title()))) # Concepts for (concept, urls) in concepts.items(): conceptURI = NS.term(cleanse(concept)) prefLabel = concept.title() g.add((conceptURI, RDF.type, SKOS.term("Concept"))) g.add((conceptURI, RDF.type, OWL.term("NamedIndividual"))) g.add((conceptURI, SKOS.term("inScheme"), schemeURI)) g.add((conceptURI, SKOS.term("prefLabel"), Literal(prefLabel, lang='en'))) # Resources from Swift book for url in urls: g.add((conceptURI, SKOS.term("definition"), URIRef(url))) # Serialization for (format, file_extension) in SERIALIZATION_FORMATS.items(): file_name = "{}_{}_{}.{}".format(SCHEME_NAME, LANGUAGE_VERSION, ONTOLOGY_VERSION, file_extension) g.serialize(format=format, destination=file_name) print("Saved under {}".format(file_name)) print("# triples:", len(g))
from rappt import Chemistry from rappt import Ecotox from rappt import strip from rdflib import Graph from rdflib.namespace import OWL basepath = './' t = Taxonomy(directory=basepath + 'taxdump/', namespace='http://example.org/ncbi') t.save(basepath + 'rdf/') c = Chemistry(directory=basepath + 'pubchem/') c.save(basepath + 'rdf/') e = Ecotox(directory=basepath + 'ecotox_ascii_03_14_2019/', namespace='http://example.org/ecotox') e.save(basepath + 'rdf/') ### Mapping CAS to CID chems = [strip(s, ['/', '#']) for s in e.chemicals()] cids = c.convert_ids(from_='cas', to_='cid', ids=chems) sameas_graph = Graph() for a, b in zip(chems, cids): if a and b: a = e.namespace['chemical/' + str(a)] b = c.compound_namespace['CID' + str(s)] sameas_graph.add((a, OWL.sameAs, b)) sameas_graph.serialize(basepath + 'rdf/equiv.nt', format='nt')
def make_rdf(infile_pubtator, outfile_rdf): g = Graph() data = Namespace("http://www.w3.org/ns/oa#") ns_oa = Namespace("http://www.w3.org/ns/oa#") ns_dcterms = Namespace("http://purl.org/dc/terms/") ns_pubmed = Namespace("http://rdf.ncbi.nlm.nih.gov/pubmed/") ns_dbsnp = Namespace("http://identifiers.org/dbsnp/") ns_ncbigene = Namespace("http://identifiers.org/ncbigene/") ns_mesh = Namespace("http://identifiers.org/mesh/") ns_omim = Namespace("http://identifiers.org/omim/") g.bind('oa', ns_oa) g.bind('dcterms', ns_dcterms) g.bind('pubmed', ns_pubmed) g.bind('dbsnp', ns_dbsnp) g.bind('ncbigene', ns_ncbigene) g.bind('mesh', ns_mesh) g.bind('omim', ns_omim) fh_in = open(infile_pubtator, 'r') #reader = csv.reader(fh_in, delimiter="\t") lines = fh_in.readlines() for line in lines: row = line.rstrip('\n').split('\t') try: pmid = row[0] disease = row[2] mention = row[3] resource = row[4] list_resource = resource.split('|') except IndexError: continue # skip header if pmid == "PMID": continue blank = BNode() g.add((blank, RDF.type, URIRef(ns_oa.Annotation))) g.add((blank, URIRef(ns_oa.hasTarget), URIRef(ns_pubmed + pmid))) # add disease id triple match_mesh = re.match(r'^MESH', disease) match_omim = re.match(r'^OMIM', disease) if match_mesh: mesh = disease.replace('MESH:', '') list_mesh = mesh.split('|') for m in list_mesh: g.add((blank, URIRef(ns_oa.hasBody), URIRef(ns_mesh + m))) elif match_omim: omim = disease.replace('OMIM:', '') list_omim = omim.split('|') for o in list_omim: g.add((blank, URIRef(ns_oa.hasBody), URIRef(ns_omim + o))) # add resource triple for s in list_resource: g.add((blank, URIRef(ns_dcterms.source), Literal(s))) # output RDF g.serialize(destination=outfile_rdf, format='turtle') fh_in.close() return
def to_rdf(self, g: Graph) -> None: subj = self.classExpressions[0].to_rdf(g) for i in range(1, len(self.classExpressions)): obj = self.classExpressions[i].to_rdf(g) g.add((subj, OWL.equivalentClass, obj)) subj = obj
ts_base = 'http://data.datascienceinstitute.ie/software/' onto_base = 'http://datascienceinstitute.ie/asio/schema#' filename = sys.argv[1] lines = [] with open(filename) as fp: lines = fp.readlines() for line in lines: if "==" in line: ts = line.replace("==", "").strip() tsuri = URIRef(ts_base+ts) if "::" in line: la = line.split("::") if len(la)==4: # ignoring homepages for now curi = URIRef(crit_base+la[0]) comment = la[1] value = int(la[2]) buri = URIRef(bib_base+la[3].strip()) as_uri = URIRef(base+ts+"_"+la[0]) g.add((as_uri, RDF.type, URIRef(onto_base+"CriterionAssessment"))) g.add((as_uri, URIRef(onto_base+"criterion"), curi)) g.add((as_uri, DC.subject, tsuri)) g.add((as_uri, DC.description, Literal(comment))) g.add((as_uri, URIRef(onto_base+"value"), Literal(value))) g.add((as_uri, URIRef('http://www.w3.org/ns/prov#primarySource'), buri)) print( g.serialize(format='ttl').decode('utf-8') )
def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp!
def _generate_reg_view_rdf(self): g = Graph() REG = Namespace('http://purl.org/linked-data/registry#') g.bind('reg', REG) LDP = Namespace('http://www.w3.org/ns/ldp#') g.bind('ldp', LDP) XHV = Namespace('https://www.w3.org/1999/xhtml/vocab#') g.bind('xhv', XHV) EREG = Namespace('https://promsns.org/def/eregistry#') g.bind('ereg', EREG) register_uri = URIRef(self.uri) g.add((register_uri, RDF.type, REG.Register)) g.add((register_uri, RDFS.label, Literal(self.label, datatype=XSD.string))) g.add((register_uri, RDFS.comment, Literal(self.comment, datatype=XSD.string))) for cic in self.contained_item_classes: g.add((register_uri, REG.containedItemClass, URIRef(cic))) if self.super_register is not None: g.add((register_uri, EREG.superregister, URIRef(self.super_register))) page_uri_str = self.uri + '?per_page=' + str(self.per_page) + '&page=' + str(self.page) page_uri_str_nonum = self.uri + '?per_page=' + str(self.per_page) + '&page=' page_uri = URIRef(page_uri_str) # pagination # this page g.add((page_uri, RDF.type, LDP.Page)) g.add((page_uri, LDP.pageOf, register_uri)) # links to other pages g.add((page_uri, XHV.first, URIRef(page_uri_str_nonum + '1'))) g.add((page_uri, XHV.last, URIRef(page_uri_str_nonum + str(self.last_page)))) if self.page != 1: g.add((page_uri, XHV.prev, URIRef(page_uri_str_nonum + str(self.page - 1)))) if self.page != self.last_page: g.add((page_uri, XHV.next, URIRef(page_uri_str_nonum + str(self.page + 1)))) if len(self.contained_item_classes) == 1: contained_item_class = URIRef(self.contained_item_classes[0]) else: contained_item_class = None # add all the items for item in self.register_items: if isinstance(item, tuple): # if it's a tuple, add in the type if len(item) < 2: raise ValueError("Not enough items in register_item tuple.") item_uri = URIRef(item[0]) if item[1] and isinstance(item[1], (str, bytes, Literal)): g.add((item_uri, RDFS.label, Literal(item[1], datatype=XSD.string))) if len(item) > 2 and isinstance(item[2], Identifier): g.add((item_uri, RDF.type, item[2])) elif contained_item_class: g.add((item_uri, RDF.type, contained_item_class)) elif item[1] and isinstance(item[1], Identifier): g.add((item_uri, RDF.type, item[1])) if len(item) > 2: g.add((item_uri, RDFS.label, Literal(item[2], datatype=XSD.string))) g.add((item_uri, REG.register, register_uri)) else: # just URIs item_uri = URIRef(item) if contained_item_class: g.add((item_uri, RDF.type, contained_item_class)) g.add((item_uri, REG.register, register_uri)) return g
def comunicacion(): """ Entrypoint de comunicacion """ global dsgraph global mss_cnt # Extraemos el mensaje y creamos un grafo con el message = request.args['content'] gm = Graph() gm.parse(data=message) msgdic = get_message_properties(gm) #Mirem si es un msg FIPA ACL if not msgdic: #Si no ho es, responem not understood logger.info('Msg no es FIPA ACL') gr = build_message(Graph(), ACL['not-understood'], sender=EmpresaTransportista.uri, msgcnt=mss_cnt) else: #Si ho es obtenim la performativa if msgdic['performative'] != ACL.request: #No es un request, not understood logger.info('Msg no es una request') gr = build_message(Graph(), ACL['not-understood'], sender=EmpresaTransportista.uri, msgcnt=mss_cnt) else: #Mirem tipus request content = msgdic['content'] action = gm.value(subject=content, predicate=RDF.type) print('La action es:', action) print('La action hauria de ser:', REQ.PeticioEmpresa) if action == REQ.PeticioEmpresa: logger.info('Es demana preu') print( '------------------------Rebem peticio------------------------' ) #obté pes, ciutat desti i plaç màxim d'entrega per ara HARDCODED pes = gm.value(subject=content, predicate=REQ.QuantProductes) #pes = 2 ciutatDesti = 'Barcelona' diaMaxim = '15/10/2021' #conjuntEmpreses = ['empresa_1', 'empresa_2', 'empresa_3', 'empresa_4', 'empresa_5'] conjuntEmpreses = [] conjuntEmpreses.append( str(gm.value(subject=content, predicate=REQ.CJE1))) conjuntEmpreses.append( str(gm.value(subject=content, predicate=REQ.CJE2))) conjuntEmpreses.append( str(gm.value(subject=content, predicate=REQ.CJE3))) conjuntEmpreses.append( str(gm.value(subject=content, predicate=REQ.CJE4))) conjuntEmpreses.append( str(gm.value(subject=content, predicate=REQ.CJE5))) gResposta = Graph() gResposta.bind('req', REQ) resposta_empresa = agn['resposta'] xsddatatypes = {'s': XSD.string, 'i': XSD.int, 'f': XSD.float} result_properties = {'Nombre': 's', 'Precio': 'f'} print('HOLA1') for prop in result_properties: if result_properties[prop] in ['s', 'i', 'f']: gResposta.add( (REQ[prop], RDF.type, OWL.DatatypeProperty)) gResposta.add((REQ[prop], RDFS.range, xsddatatypes[result_properties[prop]])) else: gResposta.add( (REQ[prop], RDF.type, OWL.ObjectProperty)) gResposta.add((REQ[prop], RDFS.range, REQ[result_properties[prop]])) gResposta.add((REQ.RespostaEmpresa, RDF.type, OWL.Class)) print('HOLA2') print('Conjunt empreses:', conjuntEmpreses) print('Tamany empreses:', len(conjuntEmpreses)) for i in range(0, len(conjuntEmpreses)): result_obj = REQ[conjuntEmpreses[i]] print('Estic dins: ', i) preu = float(pes) * random.uniform(limR[0], limR[1]) print('PES CALCULAT') gResposta.add((result_obj, RDF.type, REQ.RespostaEmpresa)) print('Estic a dins del bucle:', conjuntEmpreses[i]) gResposta.add((result_obj, REQ['Nombre'], Literal(conjuntEmpreses[i]))) print('Estic a dins del bucle2:', conjuntEmpreses[i]) gResposta.add((result_obj, REQ['Precio'], Literal(preu))) print(preu) # for row in conjuntEmpreses: # print('Estic dins bucle:', row) # preu = pes * random.uniform(limR[0], limR[1]) # gResposta.add((resposta_empresa, RDF.type, REQ.RespostaEmpresa)) # gResposta.add((resposta_empresa, REQ['Nombre'], row)) # gResposta.add((resposta_empresa, REQ['Precio'], preu)) print( '------------------------Preparat per retornar resposta------------------------' ) gr = build_message(gResposta, ACL['inform-done'], sender=EmpresaTransportista.uri, msgcnt=mss_cnt) elif action == REQ.EmpresaGuanyadora: print( '------------------------Rebem Resposta Millor Empresa------------------------' ) empresaEscollida = gm.value(subject=content, predicate=REQ.NomEmpresa) print('La empresa escollida és: ', empresaEscollida) gr = build_message(Graph(), ACL['inform-done'], sender=EmpresaTransportista.uri, msgcnt=mss_cnt) else: logger.info('Es una request que no entenem') gr = build_message(Graph(), ACL['not-understood'], sender=EmpresaTransportista.uri, msgcnt=mss_cnt) mss_cnt += 1 return gr.serialize(format='xml') pass
g.bind("prov", prov) g.bind("ti", ti) ############################# # # # Places # # # ############################# for place in root.findall('.//tei:place', tei): place_id = place.get('{http://www.w3.org/XML/1998/namespace}id') place_uri = URIRef(base_uri + '/place/' + place_id) place_ref = '#' + place_id #place g.add((place_uri, RDF.type, crm.E53_Place)) #place_sameas(place) same_as = place.get('sameAs').split() i = 0 while i < len(same_as): same_as_uri = URIRef(same_as[i]) g.add((place_uri, OWL.sameAs, same_as_uri)) i += 1 #placename(place) placename = place.find('./tei:placeName', tei) label = placename.text label_lang = placename.get('{http://www.w3.org/XML/1998/namespace}lang') if label_lang is not None: g.add((place_uri, RDFS.label, Literal(label, lang=label_lang)))
mashup_policy_res = subjects[0] print(mashup_policy_res) # user policy files = glob.glob(path.join(args.policyDir, "*.ttl")) n = 0 for userpolicy in files: print(userpolicy) g_policy = Graph() g_policy.parse(files[0], format="turtle") user_policy_res = list(g_policy.triples( (None, RDF.type, OWL.Class)))[0][0] print(user_policy_res) cfg = BNode() g_config.add((cfg, CPSS["refPolicy"], makeList(g_config, args.refPolicy))) g_config.add( (cfg, CPSS["userPolicy"], URIRef(userpolicy.replace(path.sep, "/")))) g_config.add((cfg, CPSS["usagePolicy"], URIRef(args.mashupPolicy))) g_config.add((cfg, CPSS["usageSubj"], mashup_policy_res)) g_config.add((cfg, CPSS["userSubj"], user_policy_res)) g_config.add((root, CPSS["hasConfig"], cfg)) n += 1 #if n > 5: break with open(args.outputCfg, "wb") as fo: fo.write(g_config.serialize(format="turtle"))
def to_rdf(self) -> Graph: """Generate an rdflib.Graph from this GraphSet. Returns: rdf.Graph object representing this GraphSet. """ namespace = Namespace(f"{self.name}:") node_cache = NodeCache() graph = Graph() metadata_node = BNode() graph.add((metadata_node, RDF.type, getattr(namespace, "metadata"))) graph.add((metadata_node, getattr(namespace, "name"), Literal(self.name))) graph.add((metadata_node, getattr(namespace, "version"), Literal(self.version))) graph.add( (metadata_node, getattr(namespace, "start_time"), Literal(self.start_time))) graph.add((metadata_node, getattr(namespace, "end_time"), Literal(self.end_time))) for error in self.errors: graph.add((metadata_node, getattr(namespace, "error"), Literal(error))) for resource in self.resources: resource.to_rdf(namespace=namespace, graph=graph, node_cache=node_cache) return graph
for i in map: try: value = values[j][i] except Exception as e: # print(e) continue if value == "end": continue if value != "": obj = map[i] p = URIRef(obj["uri"]) values2 = value.split("|") for value in values2: value = bbb(value) if obj["type"].upper() == "RESOURCE": all.add((subject, p, URIRef(value))) else: all.add((subject, p, Literal(value))) # print(all) all.serialize(destination="data/kani.rdf", format='pretty-xml') # print(all.serialize(format="turtle").decode("utf-8"))
def _generate_alt_profiles_rdf(self): # Alt R Data Model as per https://www.w3.org/TR/dx-prof-conneg/#altr g = Graph() ALTR = Namespace('http://www.w3.org/ns/dx/conneg/altr#') g.bind('altr', ALTR) g.bind('dct', DCTERMS) PROF = Namespace('http://www.w3.org/ns/prof/') g.bind('prof', PROF) instance_uri = URIRef(self.instance_uri) # for each Profile, lis it via its URI and give annotations for token, p in self.profiles.items(): profile_uri = URIRef(p.uri) g.add((profile_uri, RDF.type, PROF.Profile)) g.add((profile_uri, PROF.token, Literal(token, datatype=XSD.token))) g.add( (profile_uri, RDFS.label, Literal(p.label, datatype=XSD.string))) g.add((profile_uri, RDFS.comment, Literal(p.comment, datatype=XSD.string))) # for each Profile and Media Type, create a Representation for token, p in self.profiles.items(): for mt in p.mediatypes: if not str(mt).startswith( '_'): # ignore Media Types like `_internal` rep = BNode() g.add((rep, RDF.type, ALTR.Representation)) g.add((rep, DCTERMS.conformsTo, URIRef(p.uri))) g.add((rep, URIRef(DCTERMS + 'format'), Literal(mt))) # if this is the default format for the Profile, say so if mt == p.default_mediatype: g.add((rep, ALTR.isProfilesDefault, Literal(True, datatype=XSD.boolean))) # link this representation to the instances g.add((instance_uri, ALTR.hasRepresentation, rep)) # if this is the default Profile and the default Media Type, set it as the instance's default Rep if token == self.default_profile_token and mt == p.default_mediatype: g.add( (instance_uri, ALTR.hasDefaultRepresentation, rep)) return g
import json from linking import link import os path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) data = json.load(open(path + "/DatiPerElaborazione/SubwayStation.geojson", "r")) g = Graph() cmo = Namespace("http://www.comune.milano.it/ontology/") schema = Namespace("https://schema.org/") g.bind("cmo", cmo) g.bind("schema", schema) for element in data: uri = element["URI"] g.add([URIRef(uri), RDF.type, schema.SubwayStation]) g.add([URIRef(uri), RDFS.label, Literal(element["nome"])]) g.add([ URIRef(uri), cmo.lineOfPublicTransportSystemStation, Literal(element["linea"]) ]) g.add([ URIRef(uri), cmo.latitude, Literal(element["lat"], datatype=XSD.float) ]) g.add([ URIRef(uri), cmo.longitude, Literal(element["long"], datatype=XSD.float) ]) uriToLink = link(element["nome"])
if not actors: print("No actor") else: graph = Graph() graph.parse("tagraph.ttl", format="turtle") tgo_uri = "http://example.org/cyber/tgo#" actors_ = [i.lower().strip().replace(" ", "_") for i in actors] malwares_ = [i.lower().strip().replace(" ", "_") for i in malwares] campaigns_ = [i.lower().strip().replace(" ", "_") for i in campaigns] targets_ = [i.lower().strip().replace(" ", "_") for i in targets] # add all named entities into respective class for i in range(len(actors_)): graph.add((URIRef(tgo_uri + actors_[i]), RDF.type, URIRef(tgo_uri + "ThreatActor"))) graph.add( (URIRef(tgo_uri + actors_[i]), URIRef(tgo_uri + "hasTitle"), Literal(actors[i], datatype=XSD.string))) for i in range(len(malwares_)): graph.add((URIRef(tgo_uri + malwares_[i]), RDF.type, URIRef(tgo_uri + "Malware"))) graph.add( (URIRef(tgo_uri + malwares_[i]), URIRef(tgo_uri + "hasTitle"), Literal(malwares[i], datatype=XSD.string))) for i in range(len(campaigns_)): graph.add((URIRef(tgo_uri + campaigns_[i]), RDF.type, URIRef(tgo_uri + "Campaign"))) graph.add( (URIRef(tgo_uri + campaigns_[i]), URIRef(tgo_uri + "hasTitle"), Literal(campaigns[i], datatype=XSD.string)))
countryDict[str(country_id)] = uri with open(filePath_2, 'rU') as f2: instruments = csv.reader(f2, dialect='excel', delimiter=',', quotechar='"') p = re.compile(r'^http://') for row in instruments: instrument_id = row[0] name = row[1] type_indicator = row[2] sameAs_uri = row[3] predicate = row[4] section_id = row[5] if type_indicator in ('ANIMAL', 'ROLE_IND'): instrument_uri = chroles[str(instrument_id)] gInstruments.add((URIRef(instrument_uri), RDF.type, schema.Role)) elif type_indicator in ('ENS', 'ROLE_GRP'): instrument_uri = chensembles[str(instrument_id)] gInstruments.add( (URIRef(instrument_uri), RDF.type, schema.PerformingGroup)) else: instrument_uri = chinstruments[str(instrument_id)] gInstruments.add((URIRef(instrument_uri), RDF.type, mo.Instrument)) gInstruments.add((URIRef(instrument_uri), RDFS.label, Literal(name))) instrumentDict[str(instrument_id)] = {} instrumentDict[str(instrument_id)]['label'] = name instrumentDict[str(instrument_id)]['typeIndicator'] = type_indicator instrumentDict[str(instrument_id)]['uri'] = instrument_uri instrumentDict[str(instrument_id)]['sameAs'] = sameAs_uri
for parents0 in tree.findall("stations/station"): name_station = parents0.find('.//name') n2 = name_station.text estaciones1.append(n2) for i in estaciones1: for parents in tree.findall("./{ELRAD}Station"): Name_ = parents.find(".//{ELRAD}StationName") name_ = Name_.text if name_ == i: bag0 = rdflib.BNode() g.add((bag0, Name, Literal(name_))) for parents2 in parents.findall("./{ELRAD}Lines/{ELRAD}Line"): lineN = parents2.find(".//{ELRAD}LineName") if lineN is not None: lineName = lineN.text else: lineN = 'None' platform = parents2.find(".//{ELRAD}Platform") if platform is not None: platform = platform.text direction = parents2.find(".//{ELRAD}Direction") if direction is not None:
wiki_extract = WikiExtractor() page_title = 'Basic_income_around_the_world' page = wiki_extract.wikipedia.page(page_title) all_sections = wiki_extract.get_sections(sections=page.sections) section_titles = [] subsection_titles = [] only_place_filter = {'policy': "whitelist", 'types': "DBpedia:Place", 'coreferenceResolution': False} # Adding Continents and Countries RDF for section in all_sections: if isinstance(section, dict): for key in section.keys(): place = key graph.add((PROJECT[place.replace(" ", "_")], RDF.type, SCHEMA.Place)) try: section_annotations = spotlight.annotate('http://api.dbpedia-spotlight.org/en/annotate', place, confidence=0.5, filters=only_place_filter) graph.add((URIRef(section_annotations[0]['URI']), OWL.sameAs, PROJECT["place/" + place.replace(" ", "_")])) except: print("") for subsection in section[key]: if isinstance(subsection, dict): place = list(subsection.keys())[0] else: place = subsection graph.add((PROJECT["place/" + place.replace(" ", "_")], RDF.type, SCHEMA.Place)) graph.add(
gn = Namespace('https://www.geonames.org/ontology#') graph.bind('rdfs', rdfs) graph.bind('rdf', rdf) graph.bind('schema', schema) graph.bind('eac-cpf', eaccpf) graph.bind('dbo', dbo) graph.bind('djo', djo) graph.bind('owl', owl) graph.bind('fabio', fabio) graph.bind('eac-cpf', eaccpf) graph.bind('bf', bf) graph.bind('gnd', gnd) graph.bind('gn', gn) graph.add((djo['Place'], RDF['type'], owl['Class'])) graph.add((djo['Place'], rdfs['label'], Literal('Place'))) graph.add((djo['Place'], rdfs['subClassOf'], owl['Thing'])) graph.add((djo['Place'], owl['equivalentClass'], schema['Place'])) graph.add((djo['Place'], owl['equivalentClass'], gn['Feature'])) graph.add((djo['Book'], RDF['type'], owl['Class'])) graph.add((djo['Book'], rdfs['label'], Literal('Book'))) graph.add((djo['Book'], rdfs['subClassOf'], owl['Thing'])) graph.add((djo['Book'], owl['equivalentClass'], schema['Book'])) graph.add((djo['Book'], owl['equivalentClass'], fabio['Book'])) graph.add((djo['Book'], owl['equivalentClass'], bf['Work'])) graph.add((djo['Book'], owl['equivalentClass'], gnd['Work'])) graph.add((djo['Person'], RDF['type'], owl['Class'])) graph.add((djo['Person'], rdfs['label'], Literal('Person')))
# print(ensembl_geneQuery) # sparql.setQuery(ensembl_geneQuery) # sparql.setReturnFormat(JSON) # results = sparql.query().convert() # for result in results["results"]["bindings"]: # print(result["item"]["value"], result["ensemblGeneID"]["value"]) # ensemblURI[result["ensemblGeneID"]["value"]] = result["item"]["value"] i = 0 # TODO: change to uuid for record in vcf_reader: i += 1 # TODO: change to uuid chrom_nr = chrom[record.CHROM] print("hgvs: "+chrom_nr+":g."+str(record.POS)+str(record.REF)+">"+str(record.ALT[0])) variant_uri = URIRef("http://umc.nl/genetics/FAIR/"+urllib.parse.quote_plus(chrom_nr+":g."+str(record.POS)+str(record.REF)+">"+str(record.ALT[0]))) vcfGraph.add((variant_uri, RDF.type, URIRef("http://purl.obolibrary.org/obo/SO_0001060"))) vcfGraph.add((variant_uri, DCTERMS.identifier, Literal(chrom_nr+":g."+str(record.POS)+str(record.REF)+">"+str(record.ALT[0])))) vcfGraph.add((variant_uri, URIRef("http://www.wikidata.org/prop/direct/P3331"), Literal(chrom_nr+":g."+str(record.POS)+str(record.REF)+">"+str(record.ALT[0])))) chromosomeIRI = URIRef("http://umc.nl/genetics/FAIR/chromosome/"+chrom_nr) vcfGraph.add((chromosomeIRI, RDF.type, URIRef("https://www.wikidata.org/wiki/Q37748"))) vcfGraph.add((chromosomeIRI, DCTERMS.identifier, Literal(chrom_nr))) vcfGraph.add((variant_uri, DCTERMS.isPartOf, chromosomeIRI)) # Genomic START vcfGraph.add((variant_uri, wikidataprop.P644, Literal(record.POS))) vcfGraph.add((variant_uri, wikidataprop.P645, Literal(record.POS))) # print(record.) vcfInfo = record.INFO['ANN'][0].split("|") gene_uri = URIRef("http://rdf.ebi.ac.uk/resource/ensembl/"+vcfInfo[4]) # print(record.INFO['ANN'][0])
end = datetime(int(tokens[3]), int(month_to_date[tokens[1]]), int(tokens[2])) url_end = tokens[2] + "-" + tokens[1] + "-" + tokens[3] manager_job_name = manager_name.replace(' ','_') + \ ":" + club.replace(' ','_') + ":" + url_begin + ":" + url_end manager_job_URI = URIRef(ontology_root + manager_job_name) manager_name_for_URI = manager_name.replace(' ', '_') + "_manager" manager_URI = URIRef(ontology_root + manager_name_for_URI) club_URI = URIRef(ontology_root + club.replace(' ', '_')) country_URI = URIRef(ontology_root + country.replace(' ', '_')) if len(manager_nationality) > 4: nationality_URI = URIRef(ontology_root + manager_nationality.replace(' ', '_')) g.add((nationality_URI, RDF.type, dbpedia.Country)) g.add((nationality_URI, dbpedia.informationName, Literal(manager_nationality))) g.add((manager_URI, sport_ontology.hasNationality, nationality_URI)) g.add((manager_job_URI, RDF.type, sport_ontology.Transfer)) g.add((club_URI, RDF.type, sport_ontology.MultiPlayer)) g.add((manager_URI, RDF.type, sport_ontology.Manager)) g.add((country_URI, RDF.type, dbpedia.Country)) g.add((manager_job_URI, sport_ontology.hasPerson, manager_URI)) g.add((manager_job_URI, sport_ontology.hasTeam, club_URI)) g.add((manager_job_URI, sport_ontology.hasDetails, Literal(position)))