def get_configurations(cls, item_type: str, config_id: str, url: str): if not cls.__instance: cls() cls.__init_graph() validated_item = validate_config_id(cls.__source_base_uri, item_type, config_id) if validated_item: item_type = unquote(item_type) item_type = re.sub(' ', '.', item_type) url = unquote(url) url = re.sub(' ', '.', url) container = Resource(cls.__graph, URIRef(url)) streams = load_streams(cls.__source_base_uri, item_type, config_id) for stream in streams.keys(): member = Resource( cls.__graph, URIRef( url.replace('configurations', 'stream') + f'/{stream}')) container.add(RDFS.member, member) return container else: return False
def output_rdf(data, code, headers=None): """Makes a Flask response with a JSON encoded body""" content_type = request.headers.get('accept') representation = get_content_type(content_type) if code == OK: data = data.to_rdf(representation) else: g = Graph() g.bind('oslc', OSLC) rsrc = Resource(g, URIRef(request.base_url)) rsrc.add(RDF.type, OSLC.Error) if isinstance(data, dict): for attr in data: rsrc.add(OSLC.term(attr), Literal(data[attr])) data = g.serialize(format=representation) resp = make_response(data, code) resp.headers.extend(headers or {}) return resp
def __get_query_capability(cls, item_type_name: str, item_type_name_url: str, uri: str) -> Resource: qc = Resource(cls.__graph, BNode()) qc.add(RDF.type, OSLC.QueryCapability) qc.add(DCTERMS.title, Literal(f'Query Capability for ItemType: {item_type_name}')) qc.add(OSLC.queryBase, URIRef(uri)) qc.add(OSLC.resourceType, URIRef(ARAS.term(item_type_name_url))) qc.add(OSLC.resourceShape, URIRef(uri + '/resourceShape')) return qc
def convert_calendar_dates(self, csv_filename): read_dates = self.__open_file(csv_filename) for row in read_dates: service = self.get_service(str.strip(row["service_id"])) calendar_date = Resource(self.graph, URIRef(self.uri + str.strip(row["service_id"]) + "_cal" + "_" + str.strip(row["date"]))) service.add(self.GTFS.serviceRule, calendar_date) calendar_date.set(RDF.type, self.GTFS.CalendarDateRule) calendar_date.add(DCTERMS.date, self.get_date_literal(str.strip(row["date"]))) exception_type = str.strip(row["exception_type"]) if exception_type is "2": exception_type = "0" calendar_date.add(self.GTFS.dateAddition, Literal(exception_type, datatype=XSD.boolean))
def convert_frequencies(self, csv_filename): read_freqs = self.__open_file(csv_filename) for row in read_freqs: freq = Resource(self.graph, URIRef(self.uri + str.strip(row["trip_id"]) + str.strip(row["start_time"]) + str.strip(row["end_time"]))) freq.set(RDF.type, self.GTFS.Frequency) freq.add(self.GTFS.trip, self.get_trip(str.strip(row["trip_id"]))) freq.add(self.GTFS.startTime, Literal(str.strip(row["start_time"]), datatype=XSD.string)) freq.add(self.GTFS.endTime, Literal(str.strip(row["end_time"]), datatype=XSD.string)) freq.add(self.GTFS.headwaySeconds, Literal(str.strip(row["headway_secs"]), datatype=XSD.nonNegativeInteger)) if "exact_times" in row: exact = False if str.strip(row["exact_times"]) == "1": exact = True freq.add(self.GTFS.exactTimes, Literal(exact, datatype=XSD.boolean))
def get_resource_shape(cls, item_type: str, url: str, url_sp: str): if not cls.__instance: cls() cls.__init_graph() resource_shape = Resource(cls.__graph, URIRef(url)) resource_shape.add(RDF.type, OSLC.ResourceShape) rs = cls.__get_resource_shape(item_type, url_sp, cls.__source_base_uri) if rs: for subject in rs.subjects(RDF.type, OSLC.Property): prop = Resource(cls.__graph, subject) prop.add(RDF.type, OSLC.Property) for p, o in rs.predicate_objects(subject): prop.add(p, o) resource_shape.add(OSLC.property, prop) return resource_shape else: return False
def get_service_provider(cls, url: str): if not cls.__instance: cls() cls.__init_graph() service_provider = None service = cls.__get_service(url=url) if service: service_provider = Resource(cls.__graph, URIRef(url)) service_provider.add(RDF.type, OSLC.ServiceProvider) service_provider.add(OSLC.service, service) return service_provider else: return False
def convert_calendar(self, csv_filename): read_calendar = self.__open_file(csv_filename) for row in read_calendar: service = self.get_service(str.strip(row["service_id"])) calendar = Resource(self.graph, URIRef(self.uri + str.strip(row["service_id"]) + "_cal")) service.add(self.GTFS.serviceRule, calendar) calendar.set(RDF.type, self.GTFS.CalendarRule) calendar.set(self.GTFS.monday, Literal(str.strip(row["monday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.tuesday, Literal(str.strip(row["tuesday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.wednesday, Literal(str.strip(row["wednesday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.thursday, Literal(str.strip(row["thursday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.friday, Literal(str.strip(row["friday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.saturday, Literal(str.strip(row["saturday"]), datatype=XSD.boolean)) calendar.set(self.GTFS.sunday, Literal(str.strip(row["sunday"]), datatype=XSD.boolean)) temporal = Resource(self.graph, URIRef(self.uri + str.strip(row["service_id"]) + "_cal" + "_temporal")) calendar.set(DCTERMS.temporal, temporal) temporal.add(self.SCHEMA.startDate, self.get_date_literal(str.strip(row["start_date"]))) temporal.add(self.SCHEMA.endDate, self.get_date_literal(str.strip(row["end_date"])))
def get_components(cls, item_type: str, url: str, paging: bool = False, page_size: int = 0, page_no: int = 0): if not cls.__instance: cls() cls.__init_graph() if not cls.__item_types: cls.__item_types = cls.__get_item_types() if re.sub('\\.', ' ', item_type) in cls.__item_types.values(): item_type = unquote(item_type) item_type = re.sub(' ', '.', item_type) url = unquote(url) url = re.sub(' ', '.', url) container = Resource(cls.__graph, URIRef(url)) container.add(RDF.type, LDP.BasicContainer) config_ids = load_items(cls.__source_base_uri, item_type, page_size, page_no) ri, config_ids = cls.__get_paging(item_type, config_ids, url, paging, page_size, page_no) if ri: container.add(OSLC.responseInfo, ri) for config_id in config_ids: member_url = url + f'/{config_id}' member = Resource(cls.__graph, URIRef(member_url)) member.add(RDF.type, OSLC_CONFIG.Component) member.add(DCTERMS.title, Literal(config_ids[config_id]['keyed_name'])) container.add(LDP.contains, member) return container else: return False
def __get_response_info(cls, item_type: str, url: str, url_sp: str, paging: bool, page_size: int, page_no: int) -> Resource: items = load_items(cls.__source_base_uri, item_type, page_size, page_no) resource = Resource(cls.__graph, URIRef(url)) ri, items = cls.__get_paging(item_type, items, url, paging, page_size, page_no) if ri: resource.add(OSLC.responseInfo, ri) for item in items: item_url = url + '/' + re.sub(' ', '.', item) member = Resource(cls.__graph, URIRef(item_url)) resource.add(RDFS.member, member) return resource
def convert_transfers(self, csv_filename): read_transfers = self.__open_file(csv_filename) for row in read_transfers: from_stop = str.strip(row["from_stop_id"]) to_stop = str.strip(row["to_stop_id"]) transfers = Resource(self.graph, URIRef(self.uri + "_" + from_stop + "_" + to_stop)) transfers.set(RDF.type, self.GTFS.TransferRule) transfers.add(self.GTFS.originStop, self.get_stop(from_stop)) transfers.add(self.GTFS.destinationStop, self.get_stop(to_stop)) transfers.add(self.GTFS.transferType, self.get_transfer_type(str.strip(row["transfer_type"]))) if "min_transfer_time" in row and str.strip(row["min_transfer_time"]): transfers.add(self.GTFS.minimumTransferTime, Literal(str.strip(row["min_transfer_time"]), datatype=XSD.nonNegativeInteger))
def to_rdf(self, graph): super(Preview, self).to_rdf(graph) p = Resource(graph, BNode()) p.add(RDF.type, OSLC.Preview) if self.document: p.add(OSLC.document, URIRef(self.document)) if self.hint_height: p.add(OSLC.hintHeight, Literal(self.hint_height, datatype=XSD.string)) if self.hint_width: p.add(OSLC.hintWidth, Literal(self.hint_width, datatype=XSD.string)) if self.initial_height: p.add(OSLC.initialHeight, Literal(self.initial_height, datatype=XSD.string)) return p
def __get_paging(cls, item_type: str, items: dict, url: str, paging: bool, page_size: int, page_no: int) -> tuple: ri = None paging = paging if paging else page_size > 0 if paging: page_size = page_size if page_size else 50 page_no = page_no if page_no else 1 params = {'oslc.paging': 'true'} if page_size: params['oslc.pageSize'] = page_size if page_no: params['oslc.pageNo'] = page_no ri_url = cls.__get_url(url, params) ri = Resource(cls.__graph, URIRef(ri_url)) ri.add(RDF.type, OSLC.ResponseInfo) ri.add(DCTERMS.title, Literal(f'Query Results for {item_type}')) params['oslc.pageNo'] = page_no + 1 ri_url = cls.__get_url(url, params) ri.add(OSLC.nextPage, URIRef(ri_url)) return ri, items
def get_stream(cls, item_type: str, config_id: str, stream_id: str, url: str): if not cls.__instance: cls() cls.__init_graph() validated_item = validate_config_id(cls.__source_base_uri, item_type, config_id) if validated_item: item_type = unquote(item_type) item_type = re.sub(' ', '.', item_type) url = unquote(url) url = re.sub(' ', '.', url) streams = load_streams(cls.__source_base_uri, item_type, config_id) if stream_id in streams.keys(): stream = streams[stream_id] configuration = Resource(cls.__graph, URIRef(url)) configuration.add(RDF.type, OSLC_CONFIG.Stream) configuration.add(DCTERMS.identifier, Literal(stream['id'])) configuration.add(DCTERMS.title, Literal(stream['keyed_name'])) return configuration else: return False else: return False
def get_component(cls, item_type: str, config_id: str, url: str): if not cls.__instance: cls() cls.__init_graph() validated_item = validate_config_id(cls.__source_base_uri, item_type, config_id) if validated_item: item_type = unquote(item_type) item_type = re.sub(' ', '.', item_type) url = unquote(url) url = re.sub(' ', '.', url) component = Resource(cls.__graph, URIRef(url)) component.add(RDF.type, OSLC_CONFIG.Component) keyed_name = None for item in validated_item['value']: keyed_name = item['keyed_name'] if keyed_name: component.add(DCTERMS.title, Literal(keyed_name)) component.add(OSLC_CONFIG.configurations, URIRef(url + '/configurations')) return component else: return False
def to_rdf(self): g = Graph() p = Resource(g, self.uri) p.add(RDF.type, FOAF.Person) p.set(RDFS.label, Literal(self._label())) p.set(CONVERIS.converisId, Literal(self.cid)) if hasattr(self, 'cfresint'): p.set(VIVO.researchOverview, Literal(self.cfresint)) if hasattr(self, 'orcid'): p.set(VIVO.orcidId, self.orcid_uri) # Confirm the orcid g.add((self.orcid_uri, RDF.type, OWL.Thing)) # Todo - review if we want to confirm all orcids g.add((self.orcid_uri, VIVO.confirmedOrcidId, self.uri)) # Vcard individual vci_uri = URIRef(self.vcard_uri) p.set(OBO['ARG_2000028'], vci_uri) g.add((vci_uri, RDF.type, VCARD.Individual)) # Vcard Name g += self._vcard_name() g.add((vci_uri, VCARD.hasName, URIRef(self.vcard_name_uri))) # Vcard title vtg = self._vcard_title() if vtg is not None: g += vtg g.add((vci_uri, VCARD.hasTitle, URIRef(self.vcard_title_uri))) # Vcard email vte = self._vcard_email() if vte is not None: g += vte g.add((vci_uri, VCARD.hasEmail, URIRef(self.vcard_email_uri))) # positions g += self.get_positions() return g
def convert_fare_rules(self, csv_filename): read_fares = self.__open_file(csv_filename) for row in read_fares: fare = self.get_fare(str.strip(row["fare_id"])) fare_rule = Resource(self.graph, URIRef(self.uri + str.strip(row["fare_id"]) + "_rule_" + str(self.next_fare_rule_num))) self.next_fare_rule_num += 1 fare_rule.set(RDF.type, self.GTFS.FareRule) fare_rule.set(self.GTFS.fareClass, fare_rule) if "route_id" in row and str.strip(row["route_id"]) != "": fare_rule.add(self.GTFS.route, self.get_route(str.strip(row["route_id"]))) if "origin_id" in row and str.strip(row["origin_id"]) != "": fare_rule.add(self.GTFS.originZone, self.get_zone(str.strip(row["origin_id"]))) if "destination_id" in row and str.strip(row["destination_id"]) != "": fare_rule.add(self.GTFS.destinationZone, self.get_zone(str.strip(row["destination_id"]))) if "contains_id" in row and str.strip(row["contains_id"]) != "": fare_rule.add(self.GTFS.zone, self.get_zone(str.strip(row["contains_id"])))
def authorships(self): g = Graph() aus = self.authors() for au in aus: aship_uri = self.aship_uri(au['rank']) r = Resource(g, aship_uri) r.set(RDFS.label, Literal(au["display_name"])) r.set(RDF.type, VIVO.Authorship) r.set(VIVO.rank, Literal(au['rank'])) data_props = [ ('rank', VIVO.rank), ('full_name', WOS.fullName), ('display_name', WOS.displayName), ('wos_standard', WOS.standardName), ('first', WOS.firstName), ('last', WOS.lastName), ('email', WOS.email), ('dais_ng', WOS.daisNg), ('reprint', WOS.reprint), ] for key, prop in data_props: value = au.get(key) if value is not None: r.set(prop, Literal(value)) # relations r.add(VIVO.relates, self.uri) # relate to addresses too # address nums are a space separated list of numbers addr_nums = au["address"] if addr_nums is None: continue else: for anum in addr_nums.split(): addr_uris = self.addr_uris_from_number(anum) for auri in addr_uris: r.add(VIVO.relates, auri) return g
def addressships(self): g = Graph() addresses = self.addresses() for addr in addresses: addr_uri = self.addr_uri(addr["full_address"], addr["number"]) org = addr["organization"] r = Resource(g, addr_uri) r.set(RDF.type, WOS.Address) r.set(RDFS.label, Literal(addr['full_address'])) r.set(WOS.organizationName, Literal(org)) r.set(WOS.sequenceNumber, Literal(addr['number'])) # relation to author set by authorship # relate to pub r.set(VIVO.relates, self.uri) # sub orgs for idx, suborg in enumerate(addr["sub_organizations"]): label = "{}, {}".format(suborg, org) so_uri = self.sub_org_uri(label) r.add(VIVO.relates, so_uri) # relate unified orgs for uorg in addr["unified_orgs"]: uo_uri = waan_uri(uorg) r.add(VIVO.relates, uo_uri) return g
def to(self): """ Core publication metadata mapped to VIVO RDF. :return: Graph """ g = Graph() r = Resource(g, self.uri) r.set(RDFS.label, Literal(self.title())) for vtype in self.rec_type(): r.add(RDF.type, vtype) r.set(WOS.wosId, Literal(self.ut)) meta = self.meta() # data properties data_props = [ #('author_list', WOS.authorList), ('abstract', BIBO.abstract), ('funding_acknowledgement', WOS.fundingText), ('volume', BIBO.volume), ('issue', BIBO.issue), ('start', BIBO.pageStart), ('end', BIBO.pageEnd), ('page_count', BIBO.numPages), ('doi', BIBO.doi), #('cite_key', WOS.citeKey), ('reference_count', WOS.referenceCount), ('citation_count', WOS.citationCount) ] for key, prop in data_props: value = meta.get(key) if value is not None: g.add((self.uri, prop, Literal(value))) g += self.add_pub_date() return g
def to_rdf(self, graph): if not self.about: raise Exception("The title is missing") oac = Resource(graph, URIRef(self.about)) oac.add(RDF.type, URIRef(OSLC.oauthConfiguration)) if self.authorization_uri: oac.add(OSLC.authorizationURI, URIRef(self.authorization_uri)) if self.oauth_access_token_uri: oac.add(OSLC.oauthAccessTokenURI, URIRef(self.oauth_access_token_uri)) if self.oauth_request_token_uri: oac.add(OSLC.oauthRequestTokenURI, URIRef(self.oauth_request_token_uri)) return oac
def to_rdf(self, graph): super(PrefixDefinition, self).to_rdf(graph) pd = Resource(graph, BNode()) pd.add(RDF.type, OSLC.PrefixDefinition) if self.prefix: pd.add(OSLC.prefix, Literal(self.prefix)) if self.prefix_base: pd.add(OSLC.prefixBase, URIRef(self.prefix_base.uri)) return pd
def to_rdf(self, graph): super(ResponseInfo, self).to_rdf(graph) uri = self.about ri = Resource(graph, URIRef(uri)) ri.add(RDF.type, OSLC.ResponseInfo) if self.title: ri.add(DCTERMS.title, Literal(self.title, datatype=XSD.Literal)) if self.members: for item in self.members: item_url = uri + '/' + item.identifier member = Resource(graph, URIRef(item_url)) ri.add(RDFS.member, member) if self.total_count and self.total_count > 0: ri.add(OSLC.totalCount, Literal(self.total_count)) return ri
def add_pub_date(self): """ Publication dates in VIVO's expected format. """ g = Graph() value = self.pub_date() if value is None: return g date_uri = self.make_date_uri(self.ut, value) date = Resource(g, date_uri) date.set(RDF.type, VIVO.DateTimeValue) date.set(VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision) date.add(VIVO.dateTime, Literal("%sT00:00:00" % (value), datatype=XSD.dateTime)) date.add(RDFS.label, Literal(value)) date.set(RDF.type, VIVO.DateTimeValue) date.set(VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision) date.add(VIVO.dateTime, Literal("%sT00:00:00" % (value), datatype=XSD.dateTime)) date.add(RDFS.label, Literal(value)) g.add((self.uri, VIVO.dateTimeValue, date_uri)) return g
def __get_service(cls, url: str) -> Resource: service = None cls.__item_types = cls.__get_item_types() if cls.__item_types: service = Resource(cls.__graph, BNode()) service.add(RDF.type, OSLC.Service) service.add(OSLC.domain, URIRef(ARAS)) qc_url = url + '/{itemType}' for item_type in cls.__item_types: item_type_name = cls.__item_types.get(item_type) item_type_name_url = re.sub(' ', '.', item_type_name) uri = urlparse( qc_url.format(**{'itemType': item_type_name_url})) qc = cls.__get_query_capability(item_type_name, item_type_name_url, uri.geturl()) service.add(OSLC.queryCapability, qc) return service
def to_rdf(self, graph): super(ServiceProviderCatalog, self).to_rdf(graph) spc = Resource(graph, URIRef(self.about)) spc.add(RDF.type, OSLC.ServiceProviderCatalog) if self.title: spc.add(DCTERMS.title, Literal(self.title)) if self.description: spc.add(DCTERMS.description, Literal(self.description)) if self.publisher: spc.add(DCTERMS.publisher, URIRef(self.publisher.about)) if self.domain: for item in self.domain: spc.add(OSLC.domain, URIRef(item)) if self.service_provider: for sp in self.service_provider: r = sp.to_rdf(graph) spc.add(OSLC.serviceProvider, r) if self.service_provider_catalog: for item in self.service_provider_catalog: spc.add(OSLC.serviceProviderCatalog, URIRef(item.about)) if self.oauth_configuration: spc.add(OSLC.oauthConfiguration, URIRef(self.oauth_configuration.about)) spc.add(OSLC.domain, JAZZ_PROCESS.uri) return spc
def to_rdf(self, graph): super(Compact, self).to_rdf(graph) uri = self.about if self.about else '' d = Resource(graph, URIRef(uri)) d.add(RDF.type, OSLC.Compact) if self.icon: d.add(OSLC.icon, URIRef(self.icon)) if self.short_title: d.add(DCTERMS.shortTitle, Literal(self.short_title, datatype=XSD.string)) if self.title: d.add(DCTERMS.title, Literal(self.title, datatype=XSD.string)) if self.small_preview: sp = self.small_preview.to_rdf(graph) d.add(OSLC.smallPreview, sp) if self.large_preview: sp = self.large_preview.to_rdf(graph) d.add(OSLC.largePreview, sp) return d
def convert_feed(self, csv_filename): read_feed = self.__open_file(csv_filename) for row in read_feed: feed = Resource(self.graph, URIRef(str.strip(row["publisher"]))) feed.set(RDF.type, self.GTFS.Feed) feed.add(DCTERMS.publisher, Literal(str.strip(row["publisher"]), datatype=XSD.string)) feed.add(DCTERMS.title, Literal(str.strip(row["feed_publisher_name"]), datatype=XSD.string)) feed.add(DCTERMS.language, Literal(str.strip(row["feed_lang"]), datatype=XSD.string)) if "feed_version" in row and str.strip(row["feed_version"]) != "": feed.add(self.SCHEMA.version, Literal(row["feed_version"], datatype=XSD.string)) if "feed_start_date" in row and str.strip(row["feed_start_date"]) != "" and "feed_end_date" in row and str.strip(row["feed_end_date"]) != "": temporal = Resource(self.graph, URIRef(feed.identifier + "_temporal")) temporal.set(RDF.type, DCTERMS.temporal) temporal.add(self.SCHEMA.startDate, self.get_date_literal(str.strip(row["feed_start_date"]))) temporal.add(self.SCHEMA.endDate, self.get_date_literal(str.strip(row["feed_end_date"])))
def _createProvenance(self, result): provdata = IProvenanceData(result) from rdflib import URIRef, Literal, Namespace, Graph from rdflib.namespace import RDF, RDFS, FOAF, DCTERMS, XSD from rdflib.resource import Resource PROV = Namespace(u"http://www.w3.org/ns/prov#") BCCVL = Namespace(u"http://ns.bccvl.org.au/") LOCAL = Namespace(u"urn:bccvl:") graph = Graph() # the user is our agent member = api.user.get_current() username = member.getProperty('fullname') or member.getId() user = Resource(graph, LOCAL['user']) user.add(RDF['type'], PROV['Agent']) user.add(RDF['type'], FOAF['Person']) user.add(FOAF['name'], Literal(username)) user.add(FOAF['mbox'], URIRef('mailto:{}'.format(member.getProperty('email')))) # add software as agent software = Resource(graph, LOCAL['software']) software.add(RDF['type'], PROV['Agent']) software.add(RDF['type'], PROV['SoftwareAgent']) software.add(FOAF['name'], Literal('BCCVL ALA Importer')) # script content is stored somewhere on result and will be exported with zip? # ... or store along with pstats.json ? hidden from user # -> execenvironment after import -> log output? # -> source code ... maybe some link expression? stored on result ? separate entity? activity = Resource(graph, LOCAL['activity']) activity.add(RDF['type'], PROV['Activity']) # TODO: this is rather queued or created time for this activity ... could capture real start time on running status update (or start transfer) now = datetime.now().replace(microsecond=0) activity.add(PROV['startedAtTime'], Literal(now.isoformat(), datatype=XSD['dateTime'])) activity.add(PROV['hasAssociationWith'], user) activity.add(PROV['hasAssociationWith'], software) # add job parameters to activity provdata.data = graph.serialize(format="turtle")
def to_rdf(self, graph): super(Dialog, self).to_rdf(graph) d = Resource(graph, BNode()) d.add(RDF.type, OSLC.Dialog) if self.label: d.add(OSLC.label, Literal(self.label)) if self.title: d.add(DCTERMS.title, Literal(self.title)) if self.hint_width: d.add(DCTERMS.hintWidth, Literal(self.hint_width)) if self.hint_height: d.add(DCTERMS.hintHeight, Literal(self.hint_height)) if self.dialog: d.add(OSLC.dialog, URIRef(self.dialog)) if self.resource_type: for item in self.resource_type: d.add(OSLC.resourceType, URIRef(item)) if self.usage: for item in self.usage: d.add(OSLC.usage, URIRef(item)) return d
def to_rdf(self, graph): super(QueryCapability, self).to_rdf(graph) qc = Resource(graph, BNode()) qc.add(RDF.type, OSLC.QueryCapability) if self.title: qc.add(DCTERMS.title, Literal(self.title)) if self.label: qc.add(OSLC.label, Literal(self.label, datatype=XSD.string)) else: qc.add(OSLC.label, Literal(self.title, datatype=XSD.string)) if self.query_base: qc.add(OSLC.queryBase, URIRef(self.query_base)) if self.resource_shape: qc.add(OSLC.resourceShape, URIRef(self.resource_shape)) if self.resource_type: for item in self.resource_type: qc.add(OSLC.resourceType, URIRef(item)) if self.usage: for item in self.usage.items(): qc.add(OSLC.usage, URIRef(item[1])) return qc
logging.debug(owl_sameas[match_id]) else: # we want a way to catalog things that end up without any kind of match. Let's start here. if left_labels[key] not in unmatched: unmatched.append(left_labels[key]) out_graph = Graph() nsm = NamespaceManager(out_graph) nsm.bind('owl', OWL, override=False) print("Count of matches: " + str(len(match_ids))) for i in match_ids: osa = owl_sameas[i] if osa['score'] >= min_score: f = Resource(out_graph, URIRef(osa['left'])) r = Resource(out_graph, URIRef(osa['right'])) f.add(OWL.sameAs, r) r.add(OWL.sameAs, f) else: # The match was excluded by score print("Excluding " + str(osa) + " because the score was too low.") out_graph.serialize(outfile,format=left_format) if len(unmatched) > 0: if len(unmatched_deletions) > 0: unmatched = list(set(unmatched) - set(unmatched_deletions)) for u in unmatched: print(u + " matched nothing in the target set.")
def generate_rdf(data): DCO = Namespace("http://info.deepcarbon.net/schema#") VIVO = Namespace("http://vivoweb.org/ontology/core#") SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") BIBO = Namespace("http://purl.org/ontology/bibo/") g = Graph() g.bind("dco", DCO) g.bind("bibo", BIBO) g.bind("vivo", VIVO) for (uri, record) in data: try: publication = Publication(record) pub = Resource(g, URIRef(uri)) if publication.issue is not None: pub.add(BIBO.issue, Literal(publication.issue)) if publication.issued is not None and publication.issued["year"] is not None: pub.add(DCO.yearOfPublicationYear, Literal(publication.issued["year"], datatype=XSD.gYear)) if publication.volume is not None: pub.add(BIBO.volume, Literal(publication.volume)) if publication.issn is not None: pub.add(BIBO.issn, Literal(publication.issn)) if publication.pages is not None: if "-" in publication.pages: pageStart = publication.pages[:publication.pages.find("-")] pageEnd = publication.pages[publication.pages.find("-") + 1:] if pageStart != "n/a": pub.add(BIBO.pageStart, Literal(pageStart)) if pageEnd != "n/a": pub.add(BIBO.pageEnd, Literal(pageEnd)) else: #pub.add(BIBO.pages, Literal(publication.pages)) pass except ValueError as err: #print((uri,str(err))) pass with open("pub-info.ttl", "w") as out: out.write(g.serialize(format="turtle", encoding="UTF-8").decode(encoding="UTF-8"))
def convert_agency(self, csv_filename): read_agency = self.__open_file(csv_filename) for row in read_agency: if "agency_id" in row: agency = self.get_agency(str.strip(row["agency_id"])) else: agency = Resource(self.graph, URIRef(row['agency_url'])) agency.add(RDF.type, self.GTFS.Agency) name = Literal(row['agency_name'], datatype=XSD.string) agency.add(FOAF.name, name) timezone = Literal(row['agency_timezone'], datatype=XSD.string) agency.add(self.GTFS.timeZone, timezone) if 'agency_lang' in row and str.strip(row["agency_lang"]) != "": agency.add(DCTERMS.language, Literal(row['agency_lang'], datatype=XSD.string)) if 'agency_phone' in row and str.strip(row["agency_phone"]) != "": agency.add(FOAF.phone, Literal(row['agency_phone'], datatype=XSD.string)) if 'agency_fare_url' in row and str.strip(row["agency_fare_url"]) != "": agency.add(self.GTFS.fareUrl, URIRef(row['agency_fare_url']))
def __iter__(self): """missing docstring.""" for item in self.previous: # check if we have a dataset if item['_type'] not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # not a dataset yield item continue pathkey = self.pathkey(*item.keys())[0] # no path .. can't do anything if not pathkey: yield item continue path = item[pathkey] # Skip the Plone site object itself if not path: yield item continue obj = self.context.unrestrictedTraverse( path.encode().lstrip('/'), None) # FIXME: this is really not a great way to check where to find provenenace data # check if we are inside an experiment (means we import result) if IExperiment.providedBy(self.context.__parent__): # result import context = self.context else: # dataset import? context = obj # TODO: do some sanity checks provdata = IProvenanceData(context) PROV = Namespace(u"http://www.w3.org/ns/prov#") BCCVL = Namespace(u"http://ns.bccvl.org.au/") LOCAL = Namespace(u"urn:bccvl:") graph = Graph() graph.parse(data=provdata.data or '', format='turtle') activity = Resource(graph, LOCAL['activity']) # FIXME: shouldn't I use uuid instead of id? entity = Resource(graph, LOCAL[obj.id]) # create this dataset as new entity -> output of activity entity.add(RDF['type'], PROV['Entity']) # generated by entity.add(PROV['wasGeneratedBy'], activity) # PROV['prov:wasAttributedTo'] to user and software? # File metadata entity.add(DCTERMS['creator'], Literal(obj.Creator())) entity.add(DCTERMS['title'], Literal(obj.title)) entity.add(DCTERMS['description'], Literal(obj.description)) entity.add(DCTERMS['rights'], Literal(obj.rights)) if obj.portal_type == 'org.bccvl.content.dataset': entity.add(DCTERMS['format'], Literal(obj.file.contentType)) else: # FIXME: this doesn't seem to do the right thing entity.add(DCTERMS['format'], Literal(obj.format)) # TODO: add metadata about file? # genre, layers, emsc, gcm, year # set activities end time # first one wins if activity.value(PROV['endedAtTime']) is None: activity.add(PROV['endedAtTime'], Literal(datetime.now().replace(microsecond=0).isoformat(), datatype=XSD['dateTime'])) # TODO: extend activity metadata with execution environment data # (logfile import?, pstats import) .. and script + params.json file # ALA import url pd = item.get('_ala_provenance', {}) if pd: entity.add(BCCVL['download_url'], Literal(pd['url'])) # store prov data provdata.data = graph.serialize(format="turtle") yield item
def _createProvenance(self, result): provdata = IProvenanceData(result) from rdflib import URIRef, Literal, Namespace, Graph from rdflib.namespace import RDF, RDFS, FOAF, DCTERMS, XSD from rdflib.resource import Resource PROV = Namespace(u"http://www.w3.org/ns/prov#") BCCVL = Namespace(u"http://ns.bccvl.org.au/") LOCAL = Namespace(u"urn:bccvl:") graph = Graph() # the user is our agent member = api.user.get_current() username = member.getProperty('fullname') or member.getId() user = Resource(graph, LOCAL['user']) user.add(RDF['type'], PROV['Agent']) user.add(RDF['type'], FOAF['Person']) user.add(FOAF['name'], Literal(username)) user.add(FOAF['mbox'], URIRef('mailto:{}'.format(member.getProperty('email')))) # add software as agent software = Resource(graph, LOCAL['software']) software.add(RDF['type'], PROV['Agent']) software.add(RDF['type'], PROV['SoftwareAgent']) software.add(FOAF['name'], Literal('BCCVL Job Script')) # script content is stored somewhere on result and will be exported with zip? # ... or store along with pstats.json ? hidden from user # -> execenvironment after import -> log output? # -> source code ... maybe some link expression? stored on result ? separate entity? activity = Resource(graph, LOCAL['activity']) activity.add(RDF['type'], PROV['Activity']) # TODO: this is rather queued or created time for this activity ... could capture real start time on running status update (or start transfer) now = datetime.now().replace(microsecond=0) activity.add(PROV['startedAtTime'], Literal(now.isoformat(), datatype=XSD['dateTime'])) activity.add(PROV['hasAssociationWith'], user) activity.add(PROV['hasAssociationWith'], software) # add job parameters to activity for idx, (key, value) in enumerate(result.job_params.items()): param = Resource(graph, LOCAL[u'param_{}'.format(idx)]) activity.add(BCCVL['algoparam'], param) param.add(BCCVL['name'], Literal(key)) # We have only dataset references as parameters if key in ('data_table',): param.add(BCCVL['value'], LOCAL[dsuuid]) else: param.add(BCCVL['value'], Literal(value)) # iterate over all input datasets and add them as entities for key in ('data_table',): dsbrain = uuidToCatalogBrain(result.job_params[key]) if not dsbrain: continue ds = dsbrain.getObject() dsprov = Resource(graph, LOCAL[result.job_params[key]]) dsprov.add(RDF['type'], PROV['Entity']) #dsprov.add(PROV['..'], Literal('')) dsprov.add(DCTERMS['creator'], Literal(ds.Creator())) dsprov.add(DCTERMS['title'], Literal(ds.title)) dsprov.add(DCTERMS['description'], Literal(ds.description)) dsprov.add(DCTERMS['rights'], Literal(ds.rights)) # ds.rightsstatement dsprov.add(DCTERMS['format'], Literal(ds.file.contentType)) # location / source # graph.add(uri, DCTERMS['source'], Literal('')) # TODO: genre ... # TODO: resolution # species metadata md = IBCCVLMetadata(ds) # dsprov.add(BCCVL['scientificName'], Literal(md['species']['scientificName'])) # dsprov.add(BCCVL['taxonID'], URIRef(md['species']['taxonID'])) # ... species data, ... species id for layer in md.get('layers_used',()): dsprov.add(BCCVL['layer'], LOCAL[layer]) # link with activity activity.add(PROV['used'], dsprov) provdata.data = graph.serialize(format="turtle")
def to_rdf(self, graph): super(ServiceProvider, self).to_rdf(graph) uri = self.about if self.about.__contains__(self.identifier) \ else self.about + '/{}'.format(self.identifier) if self.identifier else '' sp = Resource(graph, URIRef(uri)) sp.add(RDF.type, OSLC.ServiceProvider) if self.identifier: sp.add(DCTERMS.identifier, Literal(self.identifier, datatype=XSD.string)) if self.title: sp.add(DCTERMS.title, Literal(self.title, datatype=XSD.Literal)) if self.description: sp.add(DCTERMS.description, Literal(self.description)) if self.publisher: sp.add(DCTERMS.publisher, URIRef(self.publisher.about)) if self.service: for s in self.service: r = s.to_rdf(graph) sp.add(OSLC.service, r) if self.details: sp.add(OSLC.details, URIRef(self.details)) if self.oauth_configuration: sp.add(OSLC.oauthConfiguration, URIRef(self.oauth_configuration.about)) if self.prefix_definition: for pd in self.prefix_definition: r = pd.to_rdf(graph) sp.add(OSLC.prefixDefinition, r) sp.add(JAZZ_PROCESS.supportContributionsToLinkIndexProvider, Literal(True, datatype=XSD.boolean)) sp.add(JAZZ_PROCESS.supportLinkDiscoveryViaLinkIndexProvider, Literal(True, datatype=XSD.boolean)) sp.add(JAZZ_PROCESS.supportOSLCSimpleQuery, Literal(True, datatype=XSD.boolean)) sp.add(JAZZ_PROCESS.globalConfigurationAware, Literal('yes', datatype=XSD.String)) return sp
def to_rdf(self, graph): super(Service, self).to_rdf(graph) # uri = self.about if self.about.__contains__(self.identifier) else self.about + '/{}'.format( # self.identifier) if self.identifier else '' s = Resource(graph, BNode()) s.add(RDF.type, OSLC.Service) if self.title: s.add(DCTERMS.title, Literal(self.title, datatype=XSD.Literal)) if self.description: s.add(DCTERMS.description, Literal(self.description, datatype=XSD.Literal)) if self.domain: s.add(OSLC.domain, URIRef(self.domain)) if self.creation_factory: for cf in self.creation_factory: r = cf.to_rdf(graph) s.add(OSLC.creationFactory, r) if self.query_capability: for qc in self.query_capability: r = qc.to_rdf(graph) s.add(OSLC.queryCapability, r) if self.selection_dialog: for sd in self.selection_dialog: r = sd.to_rdf(graph) s.add(OSLC.selectionDialog, r) if self.creation_dialog: for cd in self.creation_dialog: r = cd.to_rdf(graph) s.add(OSLC.creationDialog, r.identifier) return s
nsm = NamespaceManager(g) # r_namespace should == p_namespace, otherwise how can they really be reciprocals? (p_namespace,p_verb) = predicate.split(":") (r_namespace,r_verb) = reciprocal.split(":") assert p_namespace == r_namespace, "Prefixes don't match. This would probably create weird reciprocations." # find out if the namespaces are already registered via the graph, because they *should* be # I wonder if there's a more efficient way of doing this. Namespaces aren't usually prohibitively numerous, but still. nuri = '' for (p,n) in g.namespaces(): if p == p_namespace: nuri = Namespace(n) nsm.bind(p,nuri,override=False) if nuri == '': # we lack a proper prefix/namespace definition and can't continue sys.exit("\nError: Can't find the prefix in the namespace list for this graph. Are you sure it's there?\n") found_p = getattr(nuri,p_verb) found_r = getattr(nuri,r_verb) for (s,o) in g.subject_objects(predicate = found_p): new_subject = Resource(g, o) new_object = Resource(g, s) new_subject.add(found_r,new_object) g.serialize(outfile,guess_format(infile))
def to_rdf(self, graph): super(CreationFactory, self).to_rdf(graph) cf = Resource(graph, BNode()) cf.add(RDF.type, OSLC.CreationFactory) if self.title: cf.add(DCTERMS.title, Literal(self.title)) if self.label: cf.add(OSLC.label, Literal(self.label, datatype=XSD.string)) if self.creation: cf.add(OSLC.creation, URIRef(self.creation)) if self.resource_shape: for item in self.resource_shape: cf.add(OSLC.resourceShape, URIRef(item)) if self.resource_type: for item in self.resource_type: cf.add(OSLC.resourceType, URIRef(item)) if self.usage: for item in self.usage: cf.add(OSLC.usage, URIRef(item)) return cf
def to_rdf(self, graph): super(Publisher, self).to_rdf(graph) graph.bind('jfs', JFS) p = Resource(graph, URIRef(self.about)) p.add(RDF.type, DCTERMS.Publisher) if self.title: p.add(DCTERMS.title, Literal(self.title)) if self.label: p.add(OSLC.label, Literal(self.label)) if self.identifier: p.add(DCTERMS.identifier, Literal(self.identifier)) if self.icon: p.add(OSLC.icon, URIRef(self.icon)) p.add(JFS.nonLocalizedTitle, Literal('Configuration')) p.add(JFS.version, Literal('7.0')) p.add(JFS.instanceName, Literal('/pyoslc')) return p
def convert_stop_times(self, csv_filename): read_stop_times = self.__open_file(csv_filename) for row in read_stop_times: stop_id = str.strip(row["stop_id"]) sequence_num = str.strip(row["stop_sequence"]) trip_id = str.strip(row["trip_id"]) stop_time = Resource(self.graph, URIRef(self.uri + trip_id + "_" + stop_id + "_StopTime_" + sequence_num)) stop_time.set(RDF.type, self.GTFS.StopTime) stop_time.add(self.GTFS.trip, self.get_trip(trip_id)) stop_time.add(self.GTFS.arrivalTime, Literal(str.strip(row["arrival_time"]), datatype=XSD.time)) stop_time.add(self.GTFS.departureTime, Literal(str.strip(row["departure_time"]), datatype=XSD.time)) stop_time.add(self.GTFS.stop, self.get_stop(stop_id)) stop_time.add(self.GTFS.stopSequence, Literal(sequence_num, datatype=XSD.nonNegativeInteger)) if "stop_headsign" in row: stop_time.add(self.GTFS.headsign, Literal(str.strip(row["stop_headsign"]), datatype=XSD.string)) if "pickup_type" in row: pickup_type = self.get_stop_type(str.strip(row["pickup_type"])) stop_time.add(self.GTFS.pickupType, pickup_type) if "drop_off_type" in row: dropoff_type = self.get_stop_type(str.strip(row["drop_off_type"])) stop_time.add(self.GTFS.dropOffType, dropoff_type) if "shape_dist_traveled" in row: # stop_time.add(self.GTFS.distanceTraveled, # Literal(float(str.strip(row["shape_dist_traveled"])), datatype=XSD.nonNegativeInteger)) stop_time.add(self.GTFS.distanceTraveled, Literal(float(str.strip(row["shape_dist_traveled"]))))
def get_agency(self, agency_id): agency = Resource(self.graph, URIRef(self.uri + "agency_" + agency_id)) agency.add(RDF.type, self.GTFS.Agency) agency.set(DCTERMS.identifier, Literal(agency_id, datatype=XSD.string)) return agency