def _split_uri(self, identifier): if isinstance(identifier, URIRef): scheme, netloc, path, query, fragment = urlsplit(identifier) if query: namespace, resource_id = split_uri(identifier) if fragment: # if we have a fragment, we will split there namespace, resource_id = urldefrag(identifier) namespace += "#" elif "/" in path and len(path) > 1: splits = path.split("/") if path.endswith("/"): resource_id = "/".join(splits[-2:]) path = "/".join(splits[:-2]) + "/" namespace = urlunsplit((scheme, netloc, path, "", "")) else: resource_id = "/".join(splits[-1:]) path = "/".join(splits[:-1]) + "/" namespace = urlunsplit((scheme, netloc, path, "", "")) elif path: resource_id = path namespace = urlunsplit((scheme, netloc, "", "", "")) else: namespace, resource_id = split_uri(identifier) log.debug("Split %s to %s, %s" % (identifier, namespace, resource_id)) return namespace, resource_id else: raise ValueError("Unknown identifier type %r" % identifier)
def connect_catalysis(self, control,places): """ this function connects the 2 catalysis places to the transitions which are part of the reaction they point to """ #get transitions transitions = self.net.get_transition(split_uri(control.controlled)[1]) if len(transitions) ==1: existing_transition = transitions[0] new_transition = self.net.create_transition(existing_transition.id+"_"+split_uri(control.participant)[1],Direction.reverse(existing_transition.direction),existing_transition.control) #add arcs to controller self.connect_both_ways(new_transition,places[1]) #add arcs to places #get arcs from existing_transition #Here it goes wrong... arcs = self.net.get_arcs(existing_transition) #reverse direction and connect to new_transition for arc in arcs: if arc.source.id == existing_transition.id and arc.target.id != split_uri(control.controlledId)[1]: self.net.create_arc(arc.target, new_transition) if arc.target.id == existing_transition.id and arc.source.id != split_uri(control.controlledId)[1]: self.net.create_arc(new_transition, arc.source) else: print("Error: More than 1 transition found")
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj): """ Cycles through triples for a particular subject and adds them to the nidm_obj :param rdf_graph: RDF graph object :param subject_uri: URI of subject to query for additional metadata :param namespaces: Namespaces in NIDM document :param nidm_obj: NIDM object to add metadata :return: None """ #Cycle through remaining metadata and add attributes for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri): #if find qualified association if predicate == URIRef(Constants.PROV['qualifiedAssociation']): #need to get associated prov:Agent uri, add person information to graph for agent in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['wasAssociatedWith']): #add person to graph and also add all metadata person = nidm_obj.add_person(uuid=agent) #now add metadata for person add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent, namespaces=namespaces, nidm_obj=person) #get role information for bnode in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['qualifiedAssociation']): #for bnode, query for object which is role? How? #term.BNode.__dict__() #create temporary resource for this bnode r = Resource(rdf_graph, bnode) #get the object for this bnode with predicate Constants.PROV['hadRole'] for r_obj in r.objects(predicate=Constants.PROV['hadRole']): #create qualified names for objects obj_nm, obj_term = split_uri(r_obj._identifier) for uris in namespaces: if uris.uri == URIRef(obj_nm): #create qualified association in graph nidm_obj.add_qualified_association( person=person, role=pm.QualifiedName(uris, obj_term)) else: if validators.url(objects): #create qualified names for objects obj_nm, obj_term = split_uri(objects) for uris in namespaces: if uris.uri == URIRef(obj_nm): #prefix = uris.prefix nidm_obj.add_attributes( {predicate: pm.QualifiedName(uris, obj_term)}) else: nidm_obj.add_attributes( {predicate: get_RDFliteral_type(objects)})
def concept_path(subject: URIRef) -> str: """ Convert subject into an i2b2 concept path fragment. Example: Patient.status --> Patient\\status\\ :param subject: FHIR URI :return: i2b2 path fragment """ subj_path = split_uri(subject)[1] if is_w5_uri(subject): return (subj_path.rsplit('.', 1)[1] if '.' in subj_path else subj_path) + '\\' else: return split_uri(subject)[1].replace('.', '\\') + '\\'
def render_element(self, element: Element) -> str: html_lines = ["<div>"] if element.element_type and "#" in element.element_type: _, element_type = split_uri(element.element_type) else: element_type = element.element_type if "#" in element.element_id: _, element_id = split_uri(element.element_id) else: element_id = element.element_id justifications = element.informative_justifications + element.justified_by element_anchor = f'<u><span id="{element.element_id}">{element_id} ({element_type})</span></u>' html_lines.append( f"{element_anchor}: {self.render_justifications(justifications)}") statement_list = self.render_statements(element.statements) if element.statements: html_lines.append(statement_list) if (element.names or element.handles or element.prototypes or element.members or element.clusters): html_lines.append("<ul>") if element.names: html_lines.append( f"<li>names: {', '.join(element.names)}</li>") if element.handles: html_lines.append( f"<li>handles: {', '.join(element.handles)}</li>") if element.prototypes: html_lines.append( f"<li>prototypes: {', '.join([self.anchor_link(p) for p in element.prototypes])}</li>" ) if element.members: html_lines.append( f"<li>members: {', '.join([self.anchor_link(p) for p in element.members])}</li>" ) if element.clusters: html_lines.append( f"<li>clusters: {', '.join([self.anchor_link(p) for p in element.clusters])}</li>" ) html_lines.append("</ul>") html_lines.append("</div><br>") return "\n".join(html_lines)
def normalize_node_id(node_id, prefix_key='pos'): """ Normalizes the id and the label of the node and adds in a list the normalized node. Note that the list is MUTATED (URIRef, list, [str]) -> ('Positively_regulation_of' | 'Negative_regulation_of_' + URIRef, label) """ node_id = unicode(node_id) prefix = POSITIVE_PREFIX if prefix_key == 'neg': prefix = NEGATIVE_PREFIX new_node_id = '' if not node_id.find(POSITIVE_PREFIX) == -1 or \ not node_id.find(NEGATIVE_PREFIX) == -1: return node_id # assume that all ids are URIs of type `http://muliscale.ch/label` ns, qname = namespace.split_uri(node_id) new_qname = ''.join([prefix, qname]) new_node_id = ''.join([ns, new_qname]) return new_node_id
def write_to_dir(self, output_file_name: str = "visualization.html"): if self.output_dir.exists() and not self.output_dir.is_dir(): raise ValueError("argument `output_dir` must be directory.") html_file = self.output_dir / output_file_name html_lines = [ "<html>", '<head><link rel="stylesheet" href="style.css"></head>', "<body>", ] element_list_by_type = defaultdict(list) for element in self.elements.values(): if element.element_type and "#" in element.element_type: _, element_type = split_uri(element.element_type) else: element_type = element.element_type element_list_by_type[element_type].append(element) for element_type, element_list in element_list_by_type.items(): html_lines.append(f"<h1>{element_type}</h1>") for element in sorted(element_list, key=lambda e: e.element_id): rendered_element_html = self.render_element(element) html_lines.append(rendered_element_html) html_lines.extend(["</html>", "</body>"]) rendered_html = "\n".join(html_lines) html_file.write_text(rendered_html) style_file = self.output_dir / "style.css" style_file.write_text(STYLE)
def denote_things(self, mention: Mention, annotation: Annotation): if str(annotation.type).upper() not in ENTITY_ANNOTATIONS: raise ValueError( f"Cannot denote {annotation} of type {annotation.type}") # Create and bind namespaces # TODO this will be much easier once we have the full brain functionality ltalk_ns = Namespace('http://cltl.nl/leolani/talk/') self.interpretations_graph.bind('leolaniTalk', ltalk_ns) gaf_ns = Namespace('http://groundedannotationframework.org/gaf#') self.interpretations_graph.bind('gaf', gaf_ns) # Create triple mention_uri = ltalk_ns[mention.id] instance_uri = annotation.value.id self.interpretations_graph.add( (instance_uri, gaf_ns['denotedBy'], mention_uri)) # Save to file but return the string representation os.makedirs(f'{self.interpretations_path}', exist_ok=True) id = split_uri(instance_uri)[-1] with open(f'{self.interpretations_path}/annotation_{id}.trig', 'wb') as f: self.interpretations_graph.serialize(f, format="trig") data = self.interpretations_graph.serialize(format="trig") # TODO we return the serialized graph with the new triples. TBD if we want to # a) create a new graph per annotation, VS accumulate on the same graph # b) return the triples or save them return data.decode("utf-8")
def values(self, property, namespace=None, language=None, localName=False): from rdflib import URIRef if not isinstance(property, URIRef): if namespace: n = Namespace(namespace) predicate = n.term(property) else: predicate = URIRef(property) else: predicate = property if language: result = [o for s,p,o in self.__triples if p == predicate and o.language == language] else: result = [o for s,p,o in self.__triples if p == predicate] if localName: from rdflib import Literal aux = [] for x in result: if isinstance(x, Literal): aux.append(x.value) else: aux.append(split_uri(x)[1]) result = aux return result
def __init__(self, encounterURI: URIRef, patient_id: str, patient_ide_source: str) -> None: """ Create a new encounter mapping entry :param encounterURI: URI of the encounter :param patient_id: Associated patient identifier :param patient_ide_source: Associated patient identifier source """ self.encounter_mapping_entries = [] parsed_resource = parse_fhir_resource_uri(encounterURI) resource_namespace = str(parsed_resource.namespace) resource_ide = split_uri( parsed_resource.resource_type)[1] + '/' + parsed_resource.resource key = (resource_ide, resource_namespace, self.project_id, patient_id, patient_ide_source) if key in self.number_map: self.encounter_num = self.number_map[key] else: self.encounter_num = self.number_generator.new_number() pm = EncounterMapping(resource_ide, resource_namespace, self.project_id, self.encounter_num, patient_id, patient_ide_source, EncounterIDEStatus.active) self.number_map[key] = self.encounter_num self.encounter_mapping_entries.append(pm) identity_id = str(self.encounter_num) ikey = (identity_id, self.identity_source_id, self.project_id) if ikey not in self.number_map: ipm = EncounterMapping(identity_id, self.identity_source_id, self.project_id, self.encounter_num, patient_id, patient_ide_source, EncounterIDEStatus.active) self.encounter_mapping_entries.append(ipm)
def get_cluster_list(type_=None, limit=10, offset=0, sortby='size'): query = """ SELECT ?cluster ?label (COUNT(?member) AS ?memberN) WHERE { ?cluster aida:prototype ?prototype . ?prototype a ?type . label_string ?membership aida:cluster ?cluster ; aida:clusterMember ?member . } GROUP BY ?cluster ?label ORDER BY order_by """ if type_ == AIDA.Entity: query = query.replace('?type', type_.n3()) query = query.replace('label_string', '?prototype aida:hasName ?label .') query = query.replace('order_by', 'DESC(?memberN)') if type_ == AIDA.Event: query = query.replace('?type', type_.n3()) query = query.replace('label_string', '?s rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?label .') if sortby == 'type': query = query.replace('order_by', '?label DESC(?memberN)') else: query = query.replace('order_by', 'DESC(?memberN) ?label') if limit: query += " LIMIT " + str(limit) if offset: query += " OFFSET " + str(offset) for u, l, c in sparql.query(query, namespaces): if isinstance(l, URIRef): _, l = split_uri(l) yield ClusterSummary(u, u.replace('http://www.isi.edu/gaia', '/cluster').replace( 'http://www.columbia.edu', '/cluster'), l, c)
def create_places(self, control): """ Creates 2 catalysis places, or adds a new place if it is already existing :rtype: Set(Place) """ places = [] places.append(self.net.create_place(split_uri(control.controlledId)[1])) controlledName = control.controlledName if control.controlledName is None: controlledName = "" places.append(self.net.create_place(split_uri(control.controlledId)[1]+"_COMPETITIVE", controlledName + "*")) return places
def turtle(test): g = Graph() try: base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='turtle') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = Graph() res.parse(test.result, format='nt') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print "Diff:" #print "%d triples in both"%len(both) print "Turtle Only:" for t in first: print t print "--------------------" print "NT Only" for t in second: print t raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def generate_bio2vec_frmt(self, model, node_dict, outdir): logger.info("Started generating bio2vec dataset file") id2node_map = dict((v, k) for k, v in node_dict.items()) entity_emb = model sep = ',' outFile = join(outdir, "embeddings.bio2vec.tsv") with open(outFile, 'w') as file: writer = csv.writer(file, delimiter='\t') for key in entity_emb.wv.vocab: if not key.isdigit(): print("key not found:", key) continue if int(key) not in id2node_map: print("key not found:", key) continue node = id2node_map[int(key)] local_name = '' entity_type = 'entity' try: uri = node[1:len(node) -1] entity_type = find_type(uri) node = uri local_name = split_uri(uri)[1] except Exception: pass row =[node, local_name, '', '', entity_type, sep.join(map(str, entity_emb[key]))] writer.writerow(row) logger.info("Finished generating bio2vec dataset file")
def get_class(self, class_uri): class_uri = URIRef(class_uri) if self.query.is_class(class_uri): base_class_uris = self.query.get_base_classes(class_uri) if base_class_uris: base_classes = [self.get_class(cl) for cl in base_class_uris] else: base_classes = [Thing] namespace, classname = split_uri(unicode(class_uri)) cl = ClassCreator(str(classname), tuple(base_classes), { "uri": class_uri, "factory": self, }) return cl # нет такого класса? создадим! else: if self.query.create_class(class_uri): return self.get_class(class_uri) else: return None
def entity_relations(self): query = """ SELECT ?relation ?pred2 ?obj2 ?relation_type (min(?lbl) as ?label) WHERE { ?relation a aida:Relation . ?s1 rdf:subject ?relation ; rdf:predicate ?pred ; rdf:object ?obj . ?s2 rdf:subject ?relation ; rdf:predicate rdf:type ; rdf:object ?relation_type . ?s3 rdf:subject ?relation ; rdf:predicate ?pred2 ; rdf:object ?obj2 . OPTIONAL {?obj2 aida:hasName ?lbl} filter(?s3 != ?s2 && ?s3 != ?s1) } groupby ?relation ?pred2 ?obj2 ?relation_type """ for relation, pred, obj, relation_type, label in self.model.sparql.query( query, namespaces, {'obj': self.uri}): _, relation_type = split_uri(relation_type) ind = pred.find('_') pred = pred[ind + 1:] yield relation_type, obj, label
def context_resolve(self, field_uri: str) -> str: """ According to field_uri to add corresponding context and return a resolvable field_name :param field_uri: :return: a field_name that can be resolved with kg's @context """ from rdflib.namespace import split_uri context = self._kg["@context"] = self._kg.get("@context", dict()) nm = self.ontology.g.namespace_manager space, name = split_uri(field_uri) if "@vocab" not in context and None in nm.namespaces(): context["@vocab"] = nm.store.prefix(space) if "@vocab" in context and space == context["@vocab"]: # case #1, can directly use name return name if self.schema.has_field(name): if name not in context: context[name] = field_uri return name prefix = nm.store.prefix(space) if prefix: context[prefix] = space return nm.qname(field_uri) return field_uri
def _get_rdf_identified(self, graph, identity): c = {} c['identity'] = identity.toPython() if type(identity) is not str else identity c['display_id'] = self._get_triplet_value(graph, identity, SBOL.displayId) c['was_derived_from'] = self._get_triplet_value(graph, identity, PROV.wasDerivedFrom) c['version'] = self._get_triplet_value(graph, identity, SBOL.version) c['description'] = self._get_triplet_value(graph, identity, DCTERMS.description) c['name'] = self._get_triplet_value(graph, identity, DCTERMS.title) flipped_namespaces = {v: k for k, v in self._namespaces.items()} # Get annotations (non top level) c['annotations'] = [] for triple in graph.triples((identity, None, None)): namespace, obj = split_uri(triple[1]) prefix = flipped_namespaces[namespace] as_string = '{}:{}'.format(prefix, obj) if as_string not in VALID_ENTITIES: q_name = QName(namespace=namespace, local_name=obj, prefix=prefix) if isinstance(triple[2], URIRef): value = AnnotationValue(uri=triple[2].toPython()) elif isinstance(triple[2], Literal): value = AnnotationValue(literal=triple[2].toPython()) else: value = None c['annotations'].append(Annotation(q_name=q_name, annotation_value=value)) return c
def turtle(test): g = Graph() try: base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='turtle') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = Graph() res.parse(test.result, format='nt') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print("Diff:") #print "%d triples in both"%len(both) print("Turtle Only:") for t in first: print(t) print("--------------------") print("NT Only") for t in second: print(t) raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def register_uri(self, uriref): self._uriref_list.append(uriref) reference = uriref() image_format = reference.format.split("/")[1] symbol_format = split_uri(reference.has_symbol_format)[1].replace("symbol_format_", "") url = uriref.toPython() self[symbol_format, image_format] = url
def __call__(self, context): h = getUtility(IORDF).getHandler() r = h.query("select distinct ?uri ?title " "Where " "{ Graph?g " " { ?uri a %s . " " optional { ?uri <http://www.w3.org/2000/01/rdf-schema#label> ?title . }" " filter ( !isBlank(?uri) ) " " }" "} " "order by ?title" % self.classuri.n3()) # this here would be the natural way when parsing a sparql-xml-result #uris = sorted([item['g'] for item in g]) terms = defaultdict(defaultdict) for item in r: ns, local = split_uri(item[0]) if ns in NAMESPACES: groupkey = (ns, ns, NAMESPACES[ns][1]) else: groupkey = (ns, ns, ns) valuekey = (item[0], item[0], item[1] or item[0]) terms[groupkey][valuekey] = {} return TreeVocabulary.fromDict(terms)
def get_cluster_list(self, type_=None, limit=10, offset=0, sortby='size'): open_clause = close_clause = '' if self.__graph: open_clause = 'GRAPH <%s> {' % self.__graph close_clause = '}' query = """ SELECT ?cluster ?label (COUNT(?member) AS ?memberN) WHERE { %s ?cluster aida:prototype ?prototype . ?prototype a ?type . label_string ?membership aida:cluster ?cluster ; aida:clusterMember ?member . MINUS {?cluster aida:prototype ?member} %s } GROUP BY ?cluster ?label ORDER BY order_by """ % (open_clause, close_clause) if type_ == AIDA.Entity: query = query.replace('?type', type_.n3()) query = query.replace( 'label_string', 'OPTIONAL {?prototype aida:hasName ?label} .') query = query.replace('order_by', 'DESC(?memberN)') if type_ == AIDA.Event or type_ == AIDA.Relation: query = query.replace('?type', type_.n3()) query = query.replace( 'label_string', '?s rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?label .' ) if sortby == 'type': query = query.replace('order_by', '?label DESC(?memberN)') else: query = query.replace('order_by', 'DESC(?memberN) ?label') if limit: query += " LIMIT " + str(limit) if offset: query += " OFFSET " + str(offset) print(query) results = self.__sparql.query(query, namespaces) result_gen = (x for x in results if x.cluster) for r in result_gen: l = r.label u = r.cluster c = r.memberN if isinstance(l, URIRef): _, l = split_uri(l) if 'http://www.isi.edu/gaia' in u: href = u.replace('http://www.isi.edu/gaia', '/cluster') href = href.replace('/entities', '/entities/' + self.repo) href = href.replace('/events', '/events/' + self.repo) href = href.replace('/relations', '/relations/' + self.repo) else: href = u.replace('http://www.columbia.edu', '/cluster/' + self.repo) if self.graph: href = href + '?g=' + self.graph yield ClusterSummary(u, href, l, c)
def qname(self, uri): namespace, _ = split_uri(uri) prefix = self.ontology.g.namespace_manager.store.prefix( URIRef(namespace)) if prefix is None: return uri return self.ontology.g.qname(uri)
def compute_qname(uri, revNsMap): namespace, name = split_uri(uri) namespace = URIRef(namespace) prefix = revNsMap.get(namespace) if prefix is None: prefix = "_%s" % len(revNsMap) revNsMap[namespace] = prefix return (prefix, namespace, name)
def add_control(self, control): """Adds a controller to a controlled transition. Catalysis can only be of inhibition irreversible control type. Thus the controllers and cofactors are added to input places of the transition. If the direction is reversible, the transition gets duplicated. """ # Create place for new controller or cofactor places = self.create_places(control) modulator_place = self.net.create_place(split_uri(control.participant)[1],control.participantName) # Create transitions necessairy transition = self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1],Direction.left_to_right,split_uri(control.controlled)[1]+"_IRREVERSIBLE") #Create arcs self.connect_catalysis(control, places) self.connect(transition, places, modulator_place)
def add_control(self, control): """Adds a controller to a controlled transition. Catalysis can only be of activating control type. Thus the controllers and cofactors are added to input places of the transition. If the direction is reversible, the transition gets duplicated. """ # Create place for new controller or cofactor places = self.create_places(control) modulator_place = self.net.create_place(split_uri(control.participant)[1],control.participantName) # Create transitions necessairy transitions = [self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1]+"_1" ,Direction.left_to_right,split_uri(control.controlled)[1]+"_COMPETITIVE"), \ self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1]+"_2",Direction.left_to_right,split_uri(control.controlled)[1]+"_COMPETITIVE")] #Create arcs self.connect(transitions, places, modulator_place)
def _init_member(self): query = """ SELECT ?label ?type WHERE { OPTIONAL { ?member aida:hasName ?label } OPTIONAL { ?member aida:justifiedBy ?justification . ?justification skos:prefLabel ?label } ?statement rdf:subject ?member ; rdf:predicate rdf:type ; rdf:object ?type . } LIMIT 1 """ for label, type_ in self.model.sparql.query(query, namespaces, {'member': self.uri}): if not label: _, label = split_uri(type_) self.__label = label self.__type = type_ self.__targets = {} if self.__debug_info: if self.__debug_info['targets']: for i in range(0, len(self.__debug_info['targets'])): target = self.__debug_info['targets'][i] score = self.__debug_info['target_scores'][i] self.__targets[target] = score else: query = """ SELECT ?target WHERE { ?member aida:link/aida:linkTarget ?target } """ for target, in self.model.sparql.query(query, namespaces, {'member': self.uri}): self.__targets[str(target)] = 0 self.__freebases = {} if self.__debug_info: if self.__debug_info['fbid']: for i in range(0, len(self.__debug_info['fbid'])): fbid = self.__debug_info['fbid'][i] score = self.__debug_info['fbid_score_avg'][i] self.__freebases[fbid] = score else: query = """ SELECT DISTINCT ?fbid { ?member aida:privateData [ aida:jsonContent ?fbid ; aida:system <http://www.rpi.edu/EDL_Freebase> ] } """ for j_fbid, in self.model.sparql.query(query, namespaces, {'member': self.uri}): fbids = json.loads(j_fbid).get('freebase_link').keys() for fbid in fbids: self.__freebases[fbid] = 0
def concept_code(subject: URIRef) -> str: """ Return the i2b2 concept code for subject :param subject: URI to convert :return: 'ns:code' form of URI """ ns, code = split_uri(subject) return '{}:{}'.format(namespace_for(ns).upper(), code)
def _render_statement(self, statement: Statement, type_prefix: Optional[str] = None) -> str: _, pred = split_uri(statement.predicate) if type_prefix: pred = pred.replace(type_prefix, "") return f"{pred}: {self.anchor_link(statement.object)} (Justified by {self.render_justifications(statement.justified_by)})"
def add_edge(self, triple): if self.ontology_defined: _, p, _ = triple if p not in self.ontology_pty: prefix, _ = split_uri(p) if URIRef(prefix) not in common_ns: print("[WARNING] Property {} doesn't exist in the ontology!".format(p)) self.ontology_pty.add(p) # Only bark once self.edges.add(triple)
def create_transitions(self, conv): direction = self.get_conversion_direction(conv.spontaneous, conv.direction) uid = split_uri(conv.interaction)[1] if direction == Direction.reversible: return [self.net.create_transition(uid, Direction.left_to_right), self.net.create_transition(uid, Direction.right_to_left)] else: return [self.net.create_transition(uid, direction)]
def add_conversion(self, conv): transitions = self.create_transitions(conv) location = ' (' + conv.participantLocation + ') ' if conv.participantLocation else '' place = self.net.create_place(split_uri(conv.participant)[1], conv.participantName + location) for transition in transitions: self.connect(transition, place, conv.relation)
def concept_name(g: Graph, subject: URIRef) -> str: """ Return the i2b2 concept name for subject :param g: Graph - used to access label :param subject: concept subject :return: Name derived from label if it exists otherwise the URI itself """ # Note - labels appear to have '.' in them as well return str(g.label(subject, split_uri(subject)[1])).replace('.', ' ')
def go(pred): r.append({ "predicate": split_uri(pred["pred"])[1], "n": pred["n"], "unique_subj_n": pred["subjs"]["uniques"], "unique_obj_n": pred["objs"]["uniques"], "subj_types": print_types(pred["subjs"]["types"]), "obj_types": print_types(pred["objs"]["types"]) }) return None
def get_objs_per_namespace(g, ontid, typesfilter=RDFS_TYPES + OWL_TYPES, relsfilter=RDFS_RELS + OWL_RELS): """ return a dict with a dict of objects and types per namespace in graph :param g: Graph :return: Dict of { ns , Dict of {object,type} } """ res = {} decs = set(()) ont_ns = split_ns_uri(ontid) for dec_type in typesfilter: for dec in g.subjects(predicate=RDF.type, object=dec_type): decs.add(dec) for decrel in relsfilter: for dec in g.subjects(predicate=decrel): decs.add(dec) for s in decs: if (s, RDF.type, OWL.Ontology) in g: continue for p, o in g.predicate_objects(s): type = None if p == RDF.type: type = o try: (ns, qname) = split_uri(str(s)) except: try: (ns, qname) = split_uri(str(s)[:-1]) except: continue # probs a Bnode # if is an ontology declaration object restore full URL as namespace # if split_ns_uri(ns) == split_ns_uri(ont_ns): # ns = str(s) if ns not in res: res[ns] = {} if type or str(s) not in res[ns]: res[ns][str(s)] = type return res
def modifier_path(modifier: URIRef) -> str: """ Convert modifier uri into an i2b2 modifier path fragment, removing the first part of the name Example: CodedEntry.code.text --> code\text\ :param modifier: FHIR URI :return: i2b2 path fragment """ path = split_uri(modifier)[1] return (path.split('.', 1)[1].replace('.', '\\') if '.' in path else path) + '\\'
def composite_uri(parent: URIRef, mod: URIRef) -> URIRef: """ Return a composite URI consisting of the parent + '.' + the last element in the modifier :param parent: base URI :param mod: modifier URI :return: composite """ p1 = split_uri(mod)[1] return URIRef( str(parent) + '.' + (p1.rsplit('.', 1)[1] if '.' in p1 else p1))
def get(self, uriref, simplify=True): if not isinstance(uriref, URIRef): uriref = self.accession_to_uriref(uriref) if uriref in self.cache: return self.cache[uriref] results = defaultdict(list) for subject, predicate, obj in set(self.triples((uriref, None, None))): predicate_name = _camel_to_snake(split_uri(predicate)[1]) self._predicates_seen.add(predicate) if isinstance(obj, Literal): obj = obj.toPython() elif isinstance(obj, URIRef): obj = BoundURIRef(obj, source=self) if predicate in self.predicate_processor_map: obj = self.predicate_processor_map(predicate, results, obj) if obj is not None: results[predicate_name].append(obj) # If there were no results, the query might be a predicate, so try to find all the # pairs that satisfy it. if len(results) == 0: predicate_name = _camel_to_snake(split_uri(uriref)[1]) for subject, predicate, obj in set(self.triples((None, uriref, None))): if isinstance(obj, Literal): obj = obj.toPython() elif isinstance(obj, URIRef): obj = BoundURIRef(obj, source=self) if isinstance(subject, Literal): subject = subject.toPython() elif isinstance(subject, URIRef): subject = BoundURIRef(subject, source=self) results[predicate_name].append((subject, obj)) if simplify: results = {k: v if len(v) > 1 else v[0] for k, v in results.items()} results = ReferenceEntity(uriref, **results) if len(self.cache) > self.cache_size: self.cache.popitem() self.cache[uriref] = results return results
def render_element(self, element: Element) -> str: text_line = "" statements = "" if element.element_type and "#" in element.element_type: _, element_type = split_uri(element.element_type) else: element_type = element.element_type if "#" in element.element_id: _, element_id = split_uri(element.element_id) else: element_id = element.element_id justifications = element.informative_justifications + element.justified_by text_line = f"{self.render_justifications(justifications)}\t{element.prototypes}\t{element.members}\t{element.clusters}\t{element.names}\t{element.handles}\t{justifications}" if element.statements: statements = self.render_statements(element.statements) return f"{element_type}\t{element.element_id}\t{statements}\t{text_line}"
def img(self): import os.path _, name = split_uri(self.uri) svgpath = 'static/img/' + name + '.svg' if os.path.isfile(svgpath): return name from graph import SuperEdgeBasedGraph graph = SuperEdgeBasedGraph(self.neighborhood(), self, self.uri) path = graph.dot() return graph.name
def rightmost_element(uri: URIRef) -> str: """ Isolate the rightmost element in a URI path. Example: CodedEntry.code.text --> \text\ CodedEntry --> \ :param uri: input URI :return: rightmost element in path form """ uri_path = split_uri(uri)[1] return '\\' + ( (uri_path.rsplit('.', 1)[1] + '\\') if '.' in uri_path else "")
def get_transitions(self, control): direction = self.get_direction(control) control_id = split_uri(control.interaction)[1] conversion_id = split_uri(control.controlled)[1] transitions = [] if direction == Direction.left_to_right or direction == Direction.reversible: t = self.net.create_transition(conversion_id, Direction.left_to_right, control_id) transitions.append(t) if direction == Direction.right_to_left or direction == Direction.reversible: t = self.net.create_transition(conversion_id, Direction.right_to_left, control_id) transitions.append(t) if not direction: t = self.net.create_transition(conversion_id, Direction.unknown, control_id) transitions.append(t) return transitions
def ident_to_rel_type(self, identifier): namespace, rel_type = split_uri(identifier) prefix = self.prefix(namespace) if prefix is None: prefix = str(uuid.uuid1()).replace("-","_") self.bind(prefix, rel_type) if prefix != "": rel_type = ":".join((prefix,rel_type)) return rel_type.encode('utf-8')
def _collect_vocab_terms(self, graph, ns): terms = set() items = set(graph.subjects(RDF.type|RDFS.isDefinedBy, None)) for subject in items: try: uri, leaf = split_uri(subject) if uri == unicode(ns) and leaf: terms.add(leaf) except: pass self._terms_by_ns[ns] = sorted(terms)
def _collect_vocab_terms(self, graph, ns): terms = set() items = set(graph.subjects(RDF.type | RDFS.isDefinedBy, None)) for subject in items: try: uri, leaf = split_uri(subject) if uri == unicode(ns) and leaf: terms.add(leaf) except: pass self._terms_by_ns[ns] = sorted(terms)
def add_control(self, control): """Adds a controller to a controlled transition. Catalysis can only be of activating control type. Thus the controllers and cofactors are added to input places of the transition. If the direction is reversible, the transition gets duplicated. """ # Create place for new controller or cofactor place = self.net.create_place(split_uri(control.participant)[1], control.participantName) # Get all transitions which are instances of controlled transitions = self.get_transitions(control) for transition in transitions: self.connect(transition, place)
def shrink(self, iri): iri = unicode(iri) term = self._iri_map.get(iri) if term: return term.key if iri == RDF_TYPE: # NOTE: only if no term for the rdf:type IRI is defined return self.type_key try: ns, name = split_uri(iri) term = self._iri_map.get(ns) if term: return ":".join((term.key, name)) except: pass return iri
def predicate2pyattr(predicate, namespace_short_notation_reverse_dict): prefix, propertyname = split_uri(predicate) assert prefix assert propertyname # print ('predicate2pyattr', predicate, '-->', prefix, propertyname) # if not "_" in propertyname: # logger.info("%s_%s may cause problems?" % (prefix, propertyname)) if prefix not in namespace_short_notation_reverse_dict: logger.warning("%s cannot be shortened" % predicate) return predicate if namespace_short_notation_reverse_dict[prefix] == "": return propertyname else: return u"%s_%s" % (namespace_short_notation_reverse_dict[prefix], propertyname)
def trig(test): g = ConjunctiveGraph() try: base = 'http://www.w3.org/2013/TriGTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='trig') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = ConjunctiveGraph() res.parse(test.result, format='nquads') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print('===============================') print('TriG') print(g.serialize(format='nquads')) print('===============================') print('NQuads') print(res.serialize(format='nquads')) print('===============================') print("Diff:") #print "%d triples in both"%len(both) print("TriG Only:") for t in first: print(t) print("--------------------") print("NQuads Only") for t in second: print(t) raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def normalizeUri(rdfTerm,revNsMap): """ Takes an RDF Term and 'normalizes' it into a QName (using the registered prefix) or (unlike compute_qname) the Notation 3 form for URIs: <...URI...> """ try: namespace, name = split_uri(rdfTerm) namespace = URIRef(namespace) except: if isinstance(rdfTerm,Variable): return "?%s"%rdfTerm else: return "<%s>"%rdfTerm prefix = revNsMap.get(namespace) if prefix is None and isinstance(rdfTerm,Variable): return "?%s"%rdfTerm elif prefix is None: return "<%s>"%rdfTerm else: qNameParts = compute_qname(rdfTerm,revNsMap) return ':'.join([qNameParts[0],qNameParts[-1]])
def ident_to_node_def(self, identifier): if isinstance(identifier,URIRef): if "#" in identifier: # if we have a fragment, we will split there namespace, node_id = urldefrag(identifier) namespace += "#" else: # we make a best guess using split_uri logic namespace, node_id = split_uri(identifier) node_type = self.prefix(namespace) if node_type is None: node_type = str(uuid.uuid1()).replace("-","_") self.bind(node_type, namespace) return node_type, node_id elif isinstance(identifier,BNode): # Bnodes get their own table node_type = BNODE_NODE_TYPE node_id = identifier.encode("utf-8") return node_type, node_id else: raise ValueError("Unknown identifier type %r" % identifier)
def get_types(self, localName=False): if not localName: return [o for s,p,o in self.__triples if p == RDF.type] else: return [split_uri(o)[1] for s,p,o in self.__triples if p == RDF.type]
def __repr__(cls): return "class " + split_uri(cls.uri)[1]
def __repr__(self): return "object " + split_uri(self.uri)[1]
def export_case(self, graph_description, format_): """ Endpoint for handling the storage of a complete case to the KR. """ case_graph = rdflib.Graph() case_graph.parse(data=graph_description, format=format_) check_blank_node_absence_query = """SELECT ?s ?p ?o WHERE { ?s ?p ?o . FILTER(isBlank(?s) || isBlank(?o)) } """ query_result = list(case_graph.query(check_blank_node_absence_query)) if len(query_result) != 0: raise RuntimeError("Blank node are not handled when exporting data to the knowledge repository, but {0} were found." .format(len(query_result))) check_unique_literal_query = """SELECT ?s ?p (count(?o) as ?count) WHERE { ?s ?p ?o . FILTER(isLiteral(?o)) } GROUP BY ?s ?p HAVING (count(?o) > 1) """ query_result = list(case_graph.query(check_unique_literal_query)) if len(query_result) != 0: subjet_uri = str(query_result[0][0].toPython()) predicate = str(query_result[0][1].toPython()) literal_count = str(query_result[0][2].toPython()) other_uri_predicate_pair_count = str(len(query_result) - 1) raise RuntimeError("The graph must not contain an uri linked to several literals with the same predicate, but the uri " + subjet_uri + " is linked to " + literal_count + " literals by the predicate " + predicate + ". There is " + other_uri_predicate_pair_count + " other uri-predicate pair in the same case in the graph.") with self.open_session() as session: # The case is deleted from the knowledge repository to handle suppressed nodes from the database case_uri = case_graph.query("SELECT ?case_uri WHERE {?case_uri a orion:Case.}", initNs={"orion": rdflib.Namespace(self.orion_ns)}) if len(case_uri) != 1: raise RuntimeError("There must be exactly one case in the provided graph, but {0} were found.".format(len(case_uri))) case_uri = list(case_uri)[0][0].toPython() self.delete_case(case_uri, session) for s, p, o in case_graph: if isinstance(s, rdflib.term.Literal): raise RuntimeError("A subject must not be a Literal") predicate_name = split_uri(p)[1] if predicate_name == "uri": raise RuntimeError("Can not handle triplet whose predicate name is 'uri', as it is already used for the identifier " + "property in neo4j. Triplet is :({0}, {1}, {2}).".format(s, p, o)) if predicate_name == "type": if not str(o).startswith(self.orion_ns): raise RuntimeError("The type of a node must be in the ontology namespace") label = str(o)[len(self.orion_ns):] # Can not use a parameter for label, as it is not supported in neo4j # TODO: Malicious code injection might be possible query = "MERGE (node {uri: $uri}) SET node :`" + label + "`" self.query(query, {"uri": str(s)}, session) continue if isinstance(o, rdflib.term.Literal): # Can not use the name of the property as a parameter, as it is not supported in neo4j # TODO: Malicious code injection might be possible query = "MERGE (node {uri: $uri}) SET node.`" + predicate_name + "` = $value" self.query(query, {"uri": str(s), "value":o.toPython()}, session) continue # TODO: Malicious code injection might be possible query = """ MERGE (subject_node {uri: $subject_uri}) MERGE (object_node {uri: $object_uri}) MERGE (subject_node) -[:`""" + predicate_name + """`]-> (object_node) """ self.query(query, {"subject_uri": str(s), "object_uri": str(o)}, session)
def convert(self, csvreader): start = time.time() if self.OUT: sys.stderr.write("Output to %s\n" % self.OUT.name) if self.IDENT != "auto" and not isinstance(self.IDENT, tuple): self.IDENT = (self.IDENT,) if not self.BASE: warnings.warn("No base given, using http://example.org/instances/") self.BASE = rdflib.Namespace("http://example.org/instances/") if not self.PROPBASE: warnings.warn( "No property base given, using http://example.org/property/") self.PROPBASE = rdflib.Namespace("http://example.org/props/") # skip lines at the start for x in range(self.SKIP): next(csvreader) # read header line header_labels = list(csvreader.next()) headers = dict( enumerate([self.PROPBASE[toProperty(x)] for x in header_labels])) # override header properties if some are given for k, v in self.PROPS.items(): headers[k] = v header_labels[k] = split_uri(v)[1] if self.DEFINECLASS: # output class/property definitions self.triple(self.CLASS, RDF.type, RDFS.Class) for i in range(len(headers)): h, l = headers[i], header_labels[i] if h == "" or l == "": continue if self.COLUMNS.get(i, self.DEFAULT) == 'ignore': continue self.triple(h, RDF.type, RDF.Property) self.triple(h, RDFS.label, rdflib.Literal(toPropertyLabel(l))) self.triple(h, RDFS.domain, self.CLASS) self.triple(h, RDFS.range, self.COLUMNS.get(i, default_node_make).range()) rows = 0 for l in csvreader: try: if self.IDENT == 'auto': uri = self.BASE["%d" % rows] else: uri = self.BASE["_".join([quote(x.encode( "utf8").replace(" ", "_"), safe="") for x in index(l, self.IDENT)])] if self.LABEL: self.triple(uri, RDFS.label, rdflib.Literal( " ".join(index(l, self.LABEL)))) if self.CLASS: # type triple self.triple(uri, RDF.type, self.CLASS) for i, x in enumerate(l): x = x.strip() if x != '': if self.COLUMNS.get(i, self.DEFAULT) == 'ignore': continue try: o = self.COLUMNS.get(i, rdflib.Literal)(x) if isinstance(o, list): for _o in o: self.triple(uri, headers[i], _o) else: self.triple(uri, headers[i], o) except Exception as e: warnings.warn( "Could not process value for column " + "%d:%s in row %d, ignoring: %s " % ( i, headers[i], rows, e.message)) rows += 1 if rows % 100000 == 0: sys.stderr.write( "%d rows, %d triples, elapsed %.2fs.\n" % ( rows, self.triples, time.time() - start)) except: sys.stderr.write("Error processing line: %d\n" % rows) raise # output types/labels for generated URIs classes = set() for l, x in uris.items(): u, c = x self.triple(u, RDFS.label, rdflib.Literal(l)) if c: c = rdflib.URIRef(c) classes.add(c) self.triple(u, RDF.type, c) for c in classes: self.triple(c, RDF.type, RDFS.Class) self.OUT.close() sys.stderr.write( "Converted %d rows into %d triples.\n" % (rows, self.triples)) sys.stderr.write("Took %.2f seconds.\n" % (time.time() - start))
def _relabel_predicate(self, predicate): if type(predicate) is unicode: predicate = URIRef(predicate) return FIELD_NAMES.get(predicate, split_uri(predicate)[1])
def get(self, uriref, simplify=True): """Download all related information for `uriref` from the remote data source. Collects all the triples from the remote data source where `uriref` is the subject. If `uriref` is not the subject of any triples, it is re-queried as a predicate, storing the subject-object pairs. Any objects (and subjects) which are themselves :class:`rdflib.term.URIRef` instances will be converted into :class:`BoundURIRef` which will silently fetch the relevant entity from the remote source. If the predicate matches a processor rules, instead of it's object value being stored, the object will be transformed by each rule in the processor chain. Parameters ---------- uriref: str or rdflib.term.URIRef A subject or predicate. simplify: bool, optional If true, any predicate with a single value will be a scalar, and any other will be a list. Returns ------- ReferenceEntity An object representing the subject whose attributes are named after predicates with their objects as values. """ if not isinstance(uriref, URIRef): uriref = self.accession_to_uriref(uriref) if uriref in self.cache: return self.cache[uriref] results = defaultdict(list) for subject, predicate, obj in set(self.triples((uriref, None, None))): predicate_name = _camel_to_snake(split_uri(predicate)[1]) self._predicates_seen.add(predicate) if isinstance(obj, Literal): obj = obj.toPython() elif isinstance(obj, URIRef): obj = BoundURIRef(obj, source=self) if predicate in self.predicate_processor_map: obj = self.predicate_processor_map(predicate, results, obj) if obj is not None: results[predicate_name].append(obj) # If there were no results, the query might be a predicate, so try to find all the # pairs that satisfy it. if len(results) == 0: predicate_name = _camel_to_snake(split_uri(uriref)[1]) for subject, predicate, obj in set(self.triples((None, uriref, None))): if isinstance(obj, Literal): obj = obj.toPython() elif isinstance(obj, URIRef): obj = BoundURIRef(obj, source=self) if isinstance(subject, Literal): subject = subject.toPython() elif isinstance(subject, URIRef): subject = BoundURIRef(subject, source=self) results[predicate_name].append((subject, obj)) if simplify: results = {k: v if len(v) > 1 else v[0] for k, v in results.items()} results = ReferenceEntity(uriref, **results) if len(self.cache) > self.cache_size: self.cache.popitem() self.cache[uriref] = results return results