Esempio n. 1
0
    def _split_uri(self, identifier):
        if isinstance(identifier, URIRef):
            scheme, netloc, path, query, fragment = urlsplit(identifier)
            if query:
                namespace, resource_id = split_uri(identifier)
            if fragment:
                # if we have a fragment, we will split there
                namespace, resource_id = urldefrag(identifier)
                namespace += "#"
            elif "/" in path and len(path) > 1:
                splits = path.split("/")
                if path.endswith("/"):
                    resource_id = "/".join(splits[-2:])
                    path = "/".join(splits[:-2]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
                else:
                    resource_id = "/".join(splits[-1:])
                    path = "/".join(splits[:-1]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
            elif path:
                resource_id = path
                namespace = urlunsplit((scheme, netloc, "", "", ""))
            else:
                namespace, resource_id = split_uri(identifier)

            log.debug("Split %s to %s, %s" % (identifier, namespace, resource_id))
            return namespace, resource_id
        else:
            raise ValueError("Unknown identifier type %r" % identifier)
Esempio n. 2
0
    def _split_uri(self, identifier):
        if isinstance(identifier, URIRef):
            scheme, netloc, path, query, fragment = urlsplit(identifier)
            if query:
                namespace, resource_id = split_uri(identifier)
            if fragment:
                # if we have a fragment, we will split there
                namespace, resource_id = urldefrag(identifier)
                namespace += "#"
            elif "/" in path and len(path) > 1:
                splits = path.split("/")
                if path.endswith("/"):
                    resource_id = "/".join(splits[-2:])
                    path = "/".join(splits[:-2]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
                else:
                    resource_id = "/".join(splits[-1:])
                    path = "/".join(splits[:-1]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
            elif path:
                resource_id = path
                namespace = urlunsplit((scheme, netloc, "", "", ""))
            else:
                namespace, resource_id = split_uri(identifier)

            log.debug("Split %s to %s, %s" %
                      (identifier, namespace, resource_id))
            return namespace, resource_id
        else:
            raise ValueError("Unknown identifier type %r" % identifier)
Esempio n. 3
0
	def connect_catalysis(self, control,places):
		"""
		this function connects the 2 catalysis places to the transitions which are part of the reaction they point to

		"""
		#get transitions
		transitions = self.net.get_transition(split_uri(control.controlled)[1])
		if len(transitions) ==1:
			existing_transition = transitions[0]
			new_transition = self.net.create_transition(existing_transition.id+"_"+split_uri(control.participant)[1],Direction.reverse(existing_transition.direction),existing_transition.control)

			#add arcs to controller
			self.connect_both_ways(new_transition,places[1])

			#add arcs to places
			#get arcs from existing_transition
			#Here it goes wrong...
			arcs = self.net.get_arcs(existing_transition)

			#reverse direction and connect to new_transition
			for arc in arcs:
				if arc.source.id == existing_transition.id and arc.target.id != split_uri(control.controlledId)[1]:
					self.net.create_arc(arc.target, new_transition)
				if arc.target.id == existing_transition.id and arc.source.id != split_uri(control.controlledId)[1]:
					self.net.create_arc(new_transition, arc.source)
		else:
			print("Error: More than 1 transition found")
Esempio n. 4
0
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
    """
    Cycles through triples for a particular subject and adds them to the nidm_obj

    :param rdf_graph: RDF graph object
    :param subject_uri: URI of subject to query for additional metadata
    :param namespaces: Namespaces in NIDM document
    :param nidm_obj: NIDM object to add metadata
    :return: None

    """
    #Cycle through remaining metadata and add attributes
    for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri):
        #if find qualified association
        if predicate == URIRef(Constants.PROV['qualifiedAssociation']):
            #need to get associated prov:Agent uri, add person information to graph
            for agent in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['wasAssociatedWith']):
                #add person to graph and also add all metadata
                person = nidm_obj.add_person(uuid=agent)
                #now add metadata for person
                add_metadata_for_subject(rdf_graph=rdf_graph,
                                         subject_uri=agent,
                                         namespaces=namespaces,
                                         nidm_obj=person)

            #get role information
            for bnode in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['qualifiedAssociation']):
                #for bnode, query for object which is role?  How?
                #term.BNode.__dict__()

                #create temporary resource for this bnode
                r = Resource(rdf_graph, bnode)
                #get the object for this bnode with predicate Constants.PROV['hadRole']
                for r_obj in r.objects(predicate=Constants.PROV['hadRole']):
                    #create qualified names for objects
                    obj_nm, obj_term = split_uri(r_obj._identifier)
                    for uris in namespaces:
                        if uris.uri == URIRef(obj_nm):
                            #create qualified association in graph
                            nidm_obj.add_qualified_association(
                                person=person,
                                role=pm.QualifiedName(uris, obj_term))

        else:
            if validators.url(objects):
                #create qualified names for objects
                obj_nm, obj_term = split_uri(objects)
                for uris in namespaces:
                    if uris.uri == URIRef(obj_nm):
                        #prefix = uris.prefix
                        nidm_obj.add_attributes(
                            {predicate: pm.QualifiedName(uris, obj_term)})
            else:

                nidm_obj.add_attributes(
                    {predicate: get_RDFliteral_type(objects)})
Esempio n. 5
0
def concept_path(subject: URIRef) -> str:
    """
    Convert subject into an i2b2 concept path fragment.
    Example: Patient.status --> Patient\\status\\
    :param subject: FHIR URI
    :return: i2b2 path fragment
    """
    subj_path = split_uri(subject)[1]
    if is_w5_uri(subject):
        return (subj_path.rsplit('.', 1)[1]
                if '.' in subj_path else subj_path) + '\\'
    else:
        return split_uri(subject)[1].replace('.', '\\') + '\\'
Esempio n. 6
0
    def render_element(self, element: Element) -> str:
        html_lines = ["<div>"]
        if element.element_type and "#" in element.element_type:
            _, element_type = split_uri(element.element_type)
        else:
            element_type = element.element_type

        if "#" in element.element_id:
            _, element_id = split_uri(element.element_id)
        else:
            element_id = element.element_id

        justifications = element.informative_justifications + element.justified_by

        element_anchor = f'<u><span id="{element.element_id}">{element_id} ({element_type})</span></u>'

        html_lines.append(
            f"{element_anchor}: {self.render_justifications(justifications)}")

        statement_list = self.render_statements(element.statements)

        if element.statements:
            html_lines.append(statement_list)

        if (element.names or element.handles or element.prototypes
                or element.members or element.clusters):
            html_lines.append("<ul>")
            if element.names:
                html_lines.append(
                    f"<li>names: {', '.join(element.names)}</li>")
            if element.handles:
                html_lines.append(
                    f"<li>handles: {', '.join(element.handles)}</li>")
            if element.prototypes:
                html_lines.append(
                    f"<li>prototypes: {', '.join([self.anchor_link(p) for p in element.prototypes])}</li>"
                )
            if element.members:
                html_lines.append(
                    f"<li>members: {', '.join([self.anchor_link(p) for p in element.members])}</li>"
                )
            if element.clusters:
                html_lines.append(
                    f"<li>clusters: {', '.join([self.anchor_link(p) for p in element.clusters])}</li>"
                )
            html_lines.append("</ul>")
        html_lines.append("</div><br>")
        return "\n".join(html_lines)
Esempio n. 7
0
def normalize_node_id(node_id, prefix_key='pos'):
    """
    Normalizes the id and the label of the node and adds in a list the
    normalized node. Note that the list is MUTATED

    (URIRef, list, [str]) -> ('Positively_regulation_of' | 'Negative_regulation_of_' + URIRef, label)

    """
    node_id = unicode(node_id)
    prefix = POSITIVE_PREFIX

    if prefix_key == 'neg':
        prefix = NEGATIVE_PREFIX

    new_node_id = ''

    if not node_id.find(POSITIVE_PREFIX) == -1 or \
            not node_id.find(NEGATIVE_PREFIX) == -1:

        return node_id

    # assume that all ids are URIs of type `http://muliscale.ch/label`
    ns, qname = namespace.split_uri(node_id)
    new_qname = ''.join([prefix, qname])

    new_node_id = ''.join([ns, new_qname])

    return new_node_id
Esempio n. 8
0
    def write_to_dir(self, output_file_name: str = "visualization.html"):
        if self.output_dir.exists() and not self.output_dir.is_dir():
            raise ValueError("argument `output_dir` must be directory.")

        html_file = self.output_dir / output_file_name

        html_lines = [
            "<html>",
            '<head><link rel="stylesheet" href="style.css"></head>',
            "<body>",
        ]
        element_list_by_type = defaultdict(list)
        for element in self.elements.values():
            if element.element_type and "#" in element.element_type:
                _, element_type = split_uri(element.element_type)
            else:
                element_type = element.element_type
            element_list_by_type[element_type].append(element)

        for element_type, element_list in element_list_by_type.items():
            html_lines.append(f"<h1>{element_type}</h1>")
            for element in sorted(element_list, key=lambda e: e.element_id):
                rendered_element_html = self.render_element(element)
                html_lines.append(rendered_element_html)

        html_lines.extend(["</html>", "</body>"])
        rendered_html = "\n".join(html_lines)
        html_file.write_text(rendered_html)

        style_file = self.output_dir / "style.css"
        style_file.write_text(STYLE)
Esempio n. 9
0
    def denote_things(self, mention: Mention, annotation: Annotation):
        if str(annotation.type).upper() not in ENTITY_ANNOTATIONS:
            raise ValueError(
                f"Cannot denote {annotation} of type {annotation.type}")

        # Create and bind namespaces
        # TODO this will be much easier once we have the full brain functionality
        ltalk_ns = Namespace('http://cltl.nl/leolani/talk/')
        self.interpretations_graph.bind('leolaniTalk', ltalk_ns)
        gaf_ns = Namespace('http://groundedannotationframework.org/gaf#')
        self.interpretations_graph.bind('gaf', gaf_ns)

        # Create triple
        mention_uri = ltalk_ns[mention.id]
        instance_uri = annotation.value.id
        self.interpretations_graph.add(
            (instance_uri, gaf_ns['denotedBy'], mention_uri))

        # Save to file but return the string representation
        os.makedirs(f'{self.interpretations_path}', exist_ok=True)
        id = split_uri(instance_uri)[-1]
        with open(f'{self.interpretations_path}/annotation_{id}.trig',
                  'wb') as f:
            self.interpretations_graph.serialize(f, format="trig")

        data = self.interpretations_graph.serialize(format="trig")

        # TODO we return the serialized graph with the new triples. TBD if we want to
        #  a) create a new graph per annotation, VS accumulate on the same graph
        #  b) return the triples or save them
        return data.decode("utf-8")
Esempio n. 10
0
    def values(self, property, namespace=None, language=None, localName=False):
        from rdflib import URIRef
        if not isinstance(property, URIRef):
            if namespace:
                n = Namespace(namespace)
                predicate = n.term(property)
            else:
                predicate = URIRef(property)
        else:
            predicate = property

        if language:
            result = [o for s,p,o in self.__triples if p == predicate and o.language == language]
        else:
            result = [o for s,p,o in self.__triples if p == predicate]

        if localName:
            from rdflib import Literal
            aux = []
            for x in result:
                if isinstance(x, Literal):
                    aux.append(x.value)
                else:
                    aux.append(split_uri(x)[1])
            result = aux

        return result
Esempio n. 11
0
    def __init__(self, encounterURI: URIRef, patient_id: str,
                 patient_ide_source: str) -> None:
        """
        Create a new encounter mapping entry
        :param encounterURI: URI of the encounter
        :param patient_id: Associated patient identifier
        :param patient_ide_source: Associated patient identifier source
        """
        self.encounter_mapping_entries = []
        parsed_resource = parse_fhir_resource_uri(encounterURI)
        resource_namespace = str(parsed_resource.namespace)
        resource_ide = split_uri(
            parsed_resource.resource_type)[1] + '/' + parsed_resource.resource
        key = (resource_ide, resource_namespace, self.project_id, patient_id,
               patient_ide_source)
        if key in self.number_map:
            self.encounter_num = self.number_map[key]
        else:
            self.encounter_num = self.number_generator.new_number()
            pm = EncounterMapping(resource_ide, resource_namespace,
                                  self.project_id, self.encounter_num,
                                  patient_id, patient_ide_source,
                                  EncounterIDEStatus.active)
            self.number_map[key] = self.encounter_num
            self.encounter_mapping_entries.append(pm)

        identity_id = str(self.encounter_num)
        ikey = (identity_id, self.identity_source_id, self.project_id)
        if ikey not in self.number_map:
            ipm = EncounterMapping(identity_id, self.identity_source_id,
                                   self.project_id, self.encounter_num,
                                   patient_id, patient_ide_source,
                                   EncounterIDEStatus.active)
            self.encounter_mapping_entries.append(ipm)
Esempio n. 12
0
def get_cluster_list(type_=None, limit=10, offset=0, sortby='size'):
    query = """
SELECT ?cluster ?label (COUNT(?member) AS ?memberN)
WHERE {
    ?cluster aida:prototype ?prototype .
    ?prototype a ?type .
    label_string
    ?membership aida:cluster ?cluster ;
              aida:clusterMember ?member .
}
GROUP BY ?cluster ?label
ORDER BY order_by
"""
    if type_ == AIDA.Entity:
        query = query.replace('?type', type_.n3())
        query = query.replace('label_string', '?prototype aida:hasName ?label .')
        query = query.replace('order_by', 'DESC(?memberN)')
    if type_ == AIDA.Event:
        query = query.replace('?type', type_.n3())
        query = query.replace('label_string', '?s rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?label .')
        if sortby == 'type':
            query = query.replace('order_by', '?label DESC(?memberN)')
        else:
            query = query.replace('order_by', 'DESC(?memberN) ?label')
    if limit:
        query += " LIMIT " + str(limit)
    if offset:
        query += " OFFSET " + str(offset)
    for u, l, c in sparql.query(query, namespaces):
        if isinstance(l, URIRef):
            _, l = split_uri(l)
        yield ClusterSummary(u, u.replace('http://www.isi.edu/gaia', '/cluster').replace(
          'http://www.columbia.edu', '/cluster'), l, c)
Esempio n. 13
0
	def create_places(self, control):
		"""
		Creates 2 catalysis places, or adds a new place if it is already existing

		:rtype: Set(Place)
		"""

		places = []
		places.append(self.net.create_place(split_uri(control.controlledId)[1]))

		controlledName = control.controlledName
		if control.controlledName is None:
			controlledName = ""

		places.append(self.net.create_place(split_uri(control.controlledId)[1]+"_COMPETITIVE", controlledName + "*"))
		return places
Esempio n. 14
0
def turtle(test):
    g = Graph()

    try:
        base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='turtle')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = Graph()
            res.parse(test.result, format='nt')

            if verbose:
                both, first, second = graph_diff(g,res)
                if not first and not second: return
                print "Diff:"
                #print "%d triples in both"%len(both)
                print "Turtle Only:"
                for t in first:
                    print t

                print "--------------------"
                print "NT Only"
                for t in second:
                    print t
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'


    except:
        if test.syntax:
            raise
    def generate_bio2vec_frmt(self, model, node_dict, outdir): 
        logger.info("Started generating bio2vec dataset file")
        id2node_map = dict((v, k) for k, v in node_dict.items())

        entity_emb = model
        sep = ','
        outFile = join(outdir, "embeddings.bio2vec.tsv")
        with open(outFile, 'w') as file:
            writer = csv.writer(file, delimiter='\t')

            for key in entity_emb.wv.vocab:
                if not key.isdigit():
                    print("key not found:", key)
                    continue

                if int(key) not in id2node_map:
                    print("key not found:", key)
                    continue
                
                node = id2node_map[int(key)]
                local_name = ''
                entity_type = 'entity'
                try:
                    uri = node[1:len(node) -1]
                    entity_type = find_type(uri)
                    node = uri
                    local_name = split_uri(uri)[1]
                except Exception:
                    pass

                row =[node, local_name, '', '', entity_type, sep.join(map(str, entity_emb[key]))]
                writer.writerow(row)

        logger.info("Finished generating bio2vec dataset file")
    def get_class(self, class_uri):

        class_uri = URIRef(class_uri)

        if self.query.is_class(class_uri):

            base_class_uris = self.query.get_base_classes(class_uri)
            if base_class_uris:
                base_classes = [self.get_class(cl) for cl in base_class_uris]
            else:
                base_classes = [Thing]

            namespace, classname = split_uri(unicode(class_uri))

            cl = ClassCreator(str(classname), tuple(base_classes),
                              {
                                  "uri": class_uri,
                                  "factory": self,
                              })

            return cl
        # нет такого класса? создадим!
        else:
            if self.query.create_class(class_uri):
                return self.get_class(class_uri)
            else:
                return None
Esempio n. 17
0
 def entity_relations(self):
     query = """
     SELECT ?relation ?pred2 ?obj2 ?relation_type (min(?lbl) as ?label)
     WHERE {
         ?relation a aida:Relation .
         ?s1 rdf:subject ?relation ;
                     rdf:predicate ?pred ;
                     rdf:object ?obj .
         ?s2 rdf:subject ?relation ;
                     rdf:predicate rdf:type ;
                     rdf:object ?relation_type .
         ?s3 rdf:subject ?relation ;
                     rdf:predicate ?pred2 ;
                     rdf:object ?obj2 .
         OPTIONAL {?obj2 aida:hasName ?lbl}
         filter(?s3 != ?s2 && ?s3 != ?s1)
     }
     groupby ?relation ?pred2 ?obj2 ?relation_type
       """
     for relation, pred, obj, relation_type, label in self.model.sparql.query(
             query, namespaces, {'obj': self.uri}):
         _, relation_type = split_uri(relation_type)
         ind = pred.find('_')
         pred = pred[ind + 1:]
         yield relation_type, obj, label
Esempio n. 18
0
    def context_resolve(self, field_uri: str) -> str:
        """
        According to field_uri to add corresponding context and return a resolvable field_name

        :param field_uri:
        :return: a field_name that can be resolved with kg's @context
        """
        from rdflib.namespace import split_uri
        context = self._kg["@context"] = self._kg.get("@context", dict())
        nm = self.ontology.g.namespace_manager
        space, name = split_uri(field_uri)
        if "@vocab" not in context and None in nm.namespaces():
            context["@vocab"] = nm.store.prefix(space)
        if "@vocab" in context and space == context["@vocab"]:
            # case #1, can directly use name
            return name
        if self.schema.has_field(name):
            if name not in context:
                context[name] = field_uri
            return name
        prefix = nm.store.prefix(space)
        if prefix:
            context[prefix] = space
            return nm.qname(field_uri)
        return field_uri
Esempio n. 19
0
    def _get_rdf_identified(self, graph, identity):
        c = {}
        c['identity'] = identity.toPython() if type(identity) is not str else identity
        c['display_id'] = self._get_triplet_value(graph, identity, SBOL.displayId)
        c['was_derived_from'] = self._get_triplet_value(graph, identity, PROV.wasDerivedFrom)
        c['version'] = self._get_triplet_value(graph, identity, SBOL.version)
        c['description'] = self._get_triplet_value(graph, identity, DCTERMS.description)
        c['name'] = self._get_triplet_value(graph, identity, DCTERMS.title)

        flipped_namespaces = {v: k for k, v in self._namespaces.items()}
        # Get annotations (non top level)
        c['annotations'] = []
        for triple in graph.triples((identity, None, None)):
            namespace, obj = split_uri(triple[1])
            prefix = flipped_namespaces[namespace]
            as_string = '{}:{}'.format(prefix, obj)
            if as_string not in VALID_ENTITIES:
                q_name = QName(namespace=namespace, local_name=obj, prefix=prefix)
                if isinstance(triple[2], URIRef):
                    value = AnnotationValue(uri=triple[2].toPython())
                elif isinstance(triple[2], Literal):
                    value = AnnotationValue(literal=triple[2].toPython())
                else:
                    value = None
                c['annotations'].append(Annotation(q_name=q_name, annotation_value=value))
        return c
def turtle(test):
    g = Graph()

    try:
        base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='turtle')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = Graph()
            res.parse(test.result, format='nt')

            if verbose:
                both, first, second = graph_diff(g,res)
                if not first and not second: return
                print("Diff:")
                #print "%d triples in both"%len(both)
                print("Turtle Only:")
                for t in first:
                    print(t)

                print("--------------------")
                print("NT Only")
                for t in second:
                    print(t)
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'


    except:
        if test.syntax:
            raise
Esempio n. 21
0
 def register_uri(self, uriref):
     self._uriref_list.append(uriref)
     reference = uriref()
     image_format = reference.format.split("/")[1]
     symbol_format = split_uri(reference.has_symbol_format)[1].replace("symbol_format_", "")
     url = uriref.toPython()
     self[symbol_format, image_format] = url
Esempio n. 22
0
    def __call__(self, context):
        h = getUtility(IORDF).getHandler()
        r = h.query("select distinct ?uri ?title "
                    "Where "
                    "{ Graph?g "
                    "  { ?uri a %s . "
                    "    optional { ?uri <http://www.w3.org/2000/01/rdf-schema#label> ?title . }"
                    "    filter ( !isBlank(?uri) ) "
                    "  }"
                    "} "
                    "order by ?title"
                    % self.classuri.n3())
        # this here would be the natural way when parsing a sparql-xml-result
        #uris = sorted([item['g'] for item in g])

        terms = defaultdict(defaultdict)
        for item in r:
            ns, local = split_uri(item[0])
            if ns in NAMESPACES:
                groupkey = (ns, ns, NAMESPACES[ns][1])
            else:
                groupkey = (ns, ns, ns)

            valuekey = (item[0], item[0], item[1] or item[0])
            terms[groupkey][valuekey] = {}

        return TreeVocabulary.fromDict(terms)
Esempio n. 23
0
    def get_cluster_list(self, type_=None, limit=10, offset=0, sortby='size'):
        open_clause = close_clause = ''
        if self.__graph:
            open_clause = 'GRAPH <%s> {' % self.__graph
            close_clause = '}'
        query = """
    SELECT ?cluster ?label (COUNT(?member) AS ?memberN)
    WHERE {
        %s
        ?cluster aida:prototype ?prototype .
        ?prototype a ?type .
        label_string
        ?membership aida:cluster ?cluster ;
                  aida:clusterMember ?member .
        MINUS {?cluster aida:prototype ?member}
        %s
    }
    GROUP BY ?cluster ?label
    ORDER BY order_by
    """ % (open_clause, close_clause)
        if type_ == AIDA.Entity:
            query = query.replace('?type', type_.n3())
            query = query.replace(
                'label_string', 'OPTIONAL {?prototype aida:hasName ?label} .')
            query = query.replace('order_by', 'DESC(?memberN)')
        if type_ == AIDA.Event or type_ == AIDA.Relation:
            query = query.replace('?type', type_.n3())
            query = query.replace(
                'label_string',
                '?s rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?label .'
            )
            if sortby == 'type':
                query = query.replace('order_by', '?label DESC(?memberN)')
            else:
                query = query.replace('order_by', 'DESC(?memberN) ?label')
        if limit:
            query += " LIMIT " + str(limit)
        if offset:
            query += " OFFSET " + str(offset)
        print(query)

        results = self.__sparql.query(query, namespaces)
        result_gen = (x for x in results if x.cluster)
        for r in result_gen:
            l = r.label
            u = r.cluster
            c = r.memberN
            if isinstance(l, URIRef):
                _, l = split_uri(l)
            if 'http://www.isi.edu/gaia' in u:
                href = u.replace('http://www.isi.edu/gaia', '/cluster')
                href = href.replace('/entities', '/entities/' + self.repo)
                href = href.replace('/events', '/events/' + self.repo)
                href = href.replace('/relations', '/relations/' + self.repo)
            else:
                href = u.replace('http://www.columbia.edu',
                                 '/cluster/' + self.repo)
            if self.graph:
                href = href + '?g=' + self.graph
            yield ClusterSummary(u, href, l, c)
Esempio n. 24
0
 def qname(self, uri):
     namespace, _ = split_uri(uri)
     prefix = self.ontology.g.namespace_manager.store.prefix(
         URIRef(namespace))
     if prefix is None:
         return uri
     return self.ontology.g.qname(uri)
Esempio n. 25
0
def compute_qname(uri, revNsMap):
    namespace, name = split_uri(uri)
    namespace = URIRef(namespace)
    prefix = revNsMap.get(namespace)
    if prefix is None:
        prefix = "_%s" % len(revNsMap)
        revNsMap[namespace] = prefix
    return (prefix, namespace, name)
Esempio n. 26
0
def compute_qname(uri, revNsMap):
    namespace, name = split_uri(uri)
    namespace = URIRef(namespace)
    prefix = revNsMap.get(namespace)
    if prefix is None:
        prefix = "_%s" % len(revNsMap)
        revNsMap[namespace] = prefix
    return (prefix, namespace, name)
Esempio n. 27
0
	def add_control(self, control):
		"""Adds a controller to a controlled transition.

		Catalysis can only be of inhibition irreversible control type. Thus the controllers and cofactors are added to input
		places of the transition.

		If the direction is reversible, the transition gets duplicated.
		"""
		# Create place for new controller or cofactor
		places = self.create_places(control)
		modulator_place = self.net.create_place(split_uri(control.participant)[1],control.participantName)
		# Create transitions necessairy
		transition = self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1],Direction.left_to_right,split_uri(control.controlled)[1]+"_IRREVERSIBLE")

		#Create arcs
		self.connect_catalysis(control, places)
		self.connect(transition, places, modulator_place)
Esempio n. 28
0
	def add_control(self, control):
		"""Adds a controller to a controlled transition.

		Catalysis can only be of activating control type. Thus the controllers and cofactors are added to input
		places of the transition.

		If the direction is reversible, the transition gets duplicated.
		"""
		# Create place for new controller or cofactor
		places = self.create_places(control)
		modulator_place = self.net.create_place(split_uri(control.participant)[1],control.participantName)
		# Create transitions necessairy
		transitions = [self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1]+"_1" ,Direction.left_to_right,split_uri(control.controlled)[1]+"_COMPETITIVE"), \
		self.net.create_transition(split_uri(control.controlled)[1]+"_"+split_uri(control.participant)[1]+"_2",Direction.left_to_right,split_uri(control.controlled)[1]+"_COMPETITIVE")]

		#Create arcs
		self.connect(transitions, places, modulator_place)
Esempio n. 29
0
    def _init_member(self):
        query = """
SELECT ?label ?type
WHERE {
  OPTIONAL { ?member aida:hasName ?label }
  OPTIONAL { ?member aida:justifiedBy ?justification .
    ?justification skos:prefLabel ?label }
  ?statement rdf:subject ?member ;
             rdf:predicate rdf:type ;
             rdf:object ?type .
}
LIMIT 1 """
        for label, type_ in self.model.sparql.query(query, namespaces,
                                                    {'member': self.uri}):
            if not label:
                _, label = split_uri(type_)
            self.__label = label
            self.__type = type_

        self.__targets = {}
        if self.__debug_info:
            if self.__debug_info['targets']:
                for i in range(0, len(self.__debug_info['targets'])):
                    target = self.__debug_info['targets'][i]
                    score = self.__debug_info['target_scores'][i]
                    self.__targets[target] = score
        else:
            query = """
                SELECT ?target
                WHERE {
                  ?member aida:link/aida:linkTarget ?target 
                } """
            for target, in self.model.sparql.query(query, namespaces,
                                                   {'member': self.uri}):
                self.__targets[str(target)] = 0

        self.__freebases = {}
        if self.__debug_info:
            if self.__debug_info['fbid']:
                for i in range(0, len(self.__debug_info['fbid'])):
                    fbid = self.__debug_info['fbid'][i]
                    score = self.__debug_info['fbid_score_avg'][i]
                    self.__freebases[fbid] = score
        else:
            query = """
                SELECT DISTINCT ?fbid {
                   ?member aida:privateData [
                        aida:jsonContent ?fbid ;
                        aida:system <http://www.rpi.edu/EDL_Freebase>
                    ]
                }
            """

            for j_fbid, in self.model.sparql.query(query, namespaces,
                                                   {'member': self.uri}):
                fbids = json.loads(j_fbid).get('freebase_link').keys()
                for fbid in fbids:
                    self.__freebases[fbid] = 0
Esempio n. 30
0
def concept_code(subject: URIRef) -> str:
    """
    Return the i2b2 concept code for subject
    :param subject: URI to convert
    :return: 'ns:code' form of URI
    """
    ns, code = split_uri(subject)

    return '{}:{}'.format(namespace_for(ns).upper(), code)
Esempio n. 31
0
    def _render_statement(self,
                          statement: Statement,
                          type_prefix: Optional[str] = None) -> str:
        _, pred = split_uri(statement.predicate)

        if type_prefix:
            pred = pred.replace(type_prefix, "")

        return f"{pred}: {self.anchor_link(statement.object)} (Justified by {self.render_justifications(statement.justified_by)})"
Esempio n. 32
0
 def add_edge(self, triple):
     if self.ontology_defined:
         _, p, _ = triple
         if p not in self.ontology_pty:
             prefix, _ = split_uri(p)
             if URIRef(prefix) not in common_ns:
                 print("[WARNING] Property {} doesn't exist in the ontology!".format(p))
                 self.ontology_pty.add(p)  # Only bark once
     self.edges.add(triple)
Esempio n. 33
0
	def create_transitions(self, conv):
		direction = self.get_conversion_direction(conv.spontaneous, conv.direction)
		uid = split_uri(conv.interaction)[1]

		if direction == Direction.reversible:
			return [self.net.create_transition(uid, Direction.left_to_right),
					self.net.create_transition(uid, Direction.right_to_left)]
		else:
			return [self.net.create_transition(uid, direction)]
Esempio n. 34
0
	def add_conversion(self, conv):

		transitions = self.create_transitions(conv)

		location = ' (' + conv.participantLocation + ') ' if conv.participantLocation else ''
		place = self.net.create_place(split_uri(conv.participant)[1], conv.participantName + location)

		for transition in transitions:
			self.connect(transition, place, conv.relation)
Esempio n. 35
0
def concept_name(g: Graph, subject: URIRef) -> str:
    """
    Return the i2b2 concept name for subject
    :param g: Graph - used to access label
    :param subject: concept subject
    :return: Name derived from label if it exists otherwise the URI itself
    """
    # Note - labels appear to have '.' in them as well
    return str(g.label(subject, split_uri(subject)[1])).replace('.', ' ')
Esempio n. 36
0
 def go(pred):
     r.append({
         "predicate": split_uri(pred["pred"])[1],
         "n": pred["n"],
         "unique_subj_n": pred["subjs"]["uniques"],
         "unique_obj_n": pred["objs"]["uniques"],
         "subj_types": print_types(pred["subjs"]["types"]),
         "obj_types": print_types(pred["objs"]["types"])
     })
     return None
Esempio n. 37
0
def get_objs_per_namespace(g,
                           ontid,
                           typesfilter=RDFS_TYPES + OWL_TYPES,
                           relsfilter=RDFS_RELS + OWL_RELS):
    """ return a dict with a dict of objects and types per namespace in graph
    :param g: Graph
    :return: Dict of { ns , Dict of {object,type} }
    """
    res = {}
    decs = set(())

    ont_ns = split_ns_uri(ontid)

    for dec_type in typesfilter:
        for dec in g.subjects(predicate=RDF.type, object=dec_type):
            decs.add(dec)
    for decrel in relsfilter:
        for dec in g.subjects(predicate=decrel):
            decs.add(dec)

    for s in decs:
        if (s, RDF.type, OWL.Ontology) in g:
            continue
        for p, o in g.predicate_objects(s):
            type = None
            if p == RDF.type:
                type = o

            try:
                (ns, qname) = split_uri(str(s))
            except:
                try:
                    (ns, qname) = split_uri(str(s)[:-1])
                except:
                    continue  # probs a Bnode
            # if is an ontology declaration object restore full URL as namespace
            # if split_ns_uri(ns) == split_ns_uri(ont_ns):
            #     ns = str(s)
            if ns not in res:
                res[ns] = {}
            if type or str(s) not in res[ns]:
                res[ns][str(s)] = type
    return res
Esempio n. 38
0
def modifier_path(modifier: URIRef) -> str:
    """
    Convert modifier uri into an i2b2 modifier path fragment, removing the first part of the name
    Example: CodedEntry.code.text --> code\text\
    :param modifier: FHIR URI
    :return: i2b2 path fragment
    """
    path = split_uri(modifier)[1]
    return (path.split('.', 1)[1].replace('.', '\\')
            if '.' in path else path) + '\\'
Esempio n. 39
0
def composite_uri(parent: URIRef, mod: URIRef) -> URIRef:
    """
    Return a composite URI consisting of the parent + '.' + the last element in the modifier
    :param parent: base URI
    :param mod: modifier URI
    :return: composite
    """
    p1 = split_uri(mod)[1]
    return URIRef(
        str(parent) + '.' + (p1.rsplit('.', 1)[1] if '.' in p1 else p1))
Esempio n. 40
0
    def get(self, uriref, simplify=True):
        if not isinstance(uriref, URIRef):
            uriref = self.accession_to_uriref(uriref)
        if uriref in self.cache:
            return self.cache[uriref]
        results = defaultdict(list)
        for subject, predicate, obj in set(self.triples((uriref, None, None))):
            predicate_name = _camel_to_snake(split_uri(predicate)[1])
            self._predicates_seen.add(predicate)
            if isinstance(obj, Literal):
                obj = obj.toPython()
            elif isinstance(obj, URIRef):
                obj = BoundURIRef(obj, source=self)
            if predicate in self.predicate_processor_map:
                obj = self.predicate_processor_map(predicate, results, obj)
            if obj is not None:
                results[predicate_name].append(obj)

        # If there were no results, the query might be a predicate, so try to find all the
        # pairs that satisfy it.
        if len(results) == 0:
            predicate_name = _camel_to_snake(split_uri(uriref)[1])
            for subject, predicate, obj in set(self.triples((None, uriref, None))):
                if isinstance(obj, Literal):
                    obj = obj.toPython()
                elif isinstance(obj, URIRef):
                    obj = BoundURIRef(obj, source=self)

                if isinstance(subject, Literal):
                    subject = subject.toPython()
                elif isinstance(subject, URIRef):
                    subject = BoundURIRef(subject, source=self)

                results[predicate_name].append((subject, obj))

        if simplify:
            results = {k: v if len(v) > 1 else v[0] for k, v in results.items()}
        results = ReferenceEntity(uriref, **results)
        if len(self.cache) > self.cache_size:
            self.cache.popitem()
            self.cache[uriref] = results
        return results
Esempio n. 41
0
    def render_element(self, element: Element) -> str:
        text_line = ""
        statements = ""
        if element.element_type and "#" in element.element_type:
            _, element_type = split_uri(element.element_type)
        else:
            element_type = element.element_type

        if "#" in element.element_id:
            _, element_id = split_uri(element.element_id)
        else:
            element_id = element.element_id

        justifications = element.informative_justifications + element.justified_by
        text_line = f"{self.render_justifications(justifications)}\t{element.prototypes}\t{element.members}\t{element.clusters}\t{element.names}\t{element.handles}\t{justifications}"

        if element.statements:
            statements = self.render_statements(element.statements)

        return f"{element_type}\t{element.element_id}\t{statements}\t{text_line}"
Esempio n. 42
0
    def img(self):
        import os.path
        _, name = split_uri(self.uri)
        svgpath = 'static/img/' + name + '.svg'
        if os.path.isfile(svgpath):
            return name

        from graph import SuperEdgeBasedGraph
        graph = SuperEdgeBasedGraph(self.neighborhood(), self, self.uri)
        path = graph.dot()
        return graph.name
Esempio n. 43
0
def rightmost_element(uri: URIRef) -> str:
    """
    Isolate the rightmost element in a URI path.
    Example: CodedEntry.code.text --> \text\
             CodedEntry           --> \
    :param uri: input URI
    :return: rightmost element in path form
    """
    uri_path = split_uri(uri)[1]
    return '\\' + (
        (uri_path.rsplit('.', 1)[1] + '\\') if '.' in uri_path else "")
Esempio n. 44
0
	def get_transitions(self, control):
		direction = self.get_direction(control)
		control_id = split_uri(control.interaction)[1]
		conversion_id = split_uri(control.controlled)[1]

		transitions = []

		if direction == Direction.left_to_right or direction == Direction.reversible:
			t = self.net.create_transition(conversion_id, Direction.left_to_right, control_id)
			transitions.append(t)

		if direction == Direction.right_to_left or direction == Direction.reversible:
			t = self.net.create_transition(conversion_id, Direction.right_to_left, control_id)
			transitions.append(t)

		if not direction:
			t = self.net.create_transition(conversion_id, Direction.unknown, control_id)
			transitions.append(t)

		return transitions
Esempio n. 45
0
 def ident_to_rel_type(self, identifier):
     namespace, rel_type = split_uri(identifier)
     prefix = self.prefix(namespace)
     if prefix is None:
         prefix = str(uuid.uuid1()).replace("-","_")
         self.bind(prefix, rel_type)
     
     if prefix != "":
         rel_type = ":".join((prefix,rel_type))
     
     return rel_type.encode('utf-8')
Esempio n. 46
0
 def _collect_vocab_terms(self, graph, ns):
     terms = set()
     items = set(graph.subjects(RDF.type|RDFS.isDefinedBy, None))
     for subject in items:
         try:
             uri, leaf = split_uri(subject)
             if uri == unicode(ns) and leaf:
                 terms.add(leaf)
         except:
             pass
     self._terms_by_ns[ns] = sorted(terms)
Esempio n. 47
0
 def _collect_vocab_terms(self, graph, ns):
     terms = set()
     items = set(graph.subjects(RDF.type | RDFS.isDefinedBy, None))
     for subject in items:
         try:
             uri, leaf = split_uri(subject)
             if uri == unicode(ns) and leaf:
                 terms.add(leaf)
         except:
             pass
     self._terms_by_ns[ns] = sorted(terms)
Esempio n. 48
0
	def add_control(self, control):
		"""Adds a controller to a controlled transition.

		Catalysis can only be of activating control type. Thus the controllers and cofactors are added to input
		places of the transition.

		If the direction is reversible, the transition gets duplicated.
		"""
		# Create place for new controller or cofactor
		place = self.net.create_place(split_uri(control.participant)[1], control.participantName)
		# Get all transitions which are instances of controlled
		transitions = self.get_transitions(control)

		for transition in transitions:
			self.connect(transition, place)
Esempio n. 49
0
 def shrink(self, iri):
     iri = unicode(iri)
     term = self._iri_map.get(iri)
     if term:
         return term.key
     if iri == RDF_TYPE:
         # NOTE: only if no term for the rdf:type IRI is defined
         return self.type_key
     try:
         ns, name = split_uri(iri)
         term = self._iri_map.get(ns)
         if term:
             return ":".join((term.key, name))
     except:
         pass
     return iri
Esempio n. 50
0
File: utils.py Progetto: dmr/Ldtools
def predicate2pyattr(predicate, namespace_short_notation_reverse_dict):
    prefix, propertyname = split_uri(predicate)
    assert prefix
    assert propertyname

    # print ('predicate2pyattr', predicate, '-->', prefix, propertyname)
    # if not "_" in propertyname:
    #    logger.info("%s_%s may cause problems?" % (prefix, propertyname))

    if prefix not in namespace_short_notation_reverse_dict:
        logger.warning("%s cannot be shortened" % predicate)
        return predicate

    if namespace_short_notation_reverse_dict[prefix] == "":
        return propertyname
    else:
        return u"%s_%s" % (namespace_short_notation_reverse_dict[prefix], propertyname)
Esempio n. 51
0
def trig(test):
    g = ConjunctiveGraph()

    try:
        base = 'http://www.w3.org/2013/TriGTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='trig')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = ConjunctiveGraph()
            res.parse(test.result, format='nquads')

            if verbose:


                both, first, second = graph_diff(g,res)
                if not first and not second: return

                print('===============================')
                print('TriG')
                print(g.serialize(format='nquads'))
                print('===============================')
                print('NQuads')
                print(res.serialize(format='nquads'))
                print('===============================')

                print("Diff:")
                #print "%d triples in both"%len(both)
                print("TriG Only:")
                for t in first:
                    print(t)

                print("--------------------")
                print("NQuads Only")
                for t in second:
                    print(t)
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'

    except:
        if test.syntax:
            raise
Esempio n. 52
0
def normalizeUri(rdfTerm,revNsMap):
    """
    Takes an RDF Term and 'normalizes' it into a QName (using the registered prefix)
    or (unlike compute_qname) the Notation 3 form for URIs: <...URI...>
    """
    try:
        namespace, name = split_uri(rdfTerm)
        namespace = URIRef(namespace)
    except:
        if isinstance(rdfTerm,Variable):
            return "?%s"%rdfTerm
        else:
            return "<%s>"%rdfTerm
    prefix = revNsMap.get(namespace)
    if prefix is None and isinstance(rdfTerm,Variable):
        return "?%s"%rdfTerm
    elif prefix is None:
        return "<%s>"%rdfTerm
    else:
        qNameParts = compute_qname(rdfTerm,revNsMap)
        return ':'.join([qNameParts[0],qNameParts[-1]])
Esempio n. 53
0
    def ident_to_node_def(self, identifier):
        if isinstance(identifier,URIRef): 
            if "#" in identifier:
                # if we have a fragment, we will split there
                namespace, node_id = urldefrag(identifier)
                namespace += "#"
            else:
                # we make a best guess using split_uri logic
                namespace, node_id = split_uri(identifier)

            node_type = self.prefix(namespace)
            if node_type is None:
                node_type = str(uuid.uuid1()).replace("-","_")
                self.bind(node_type, namespace)

            return node_type, node_id
        elif isinstance(identifier,BNode): 
            # Bnodes get their own table
            node_type = BNODE_NODE_TYPE
            node_id = identifier.encode("utf-8")
            return node_type, node_id
        else:
            raise ValueError("Unknown identifier type %r" % identifier)
Esempio n. 54
0
 def get_types(self, localName=False):
     if not localName:
         return [o for s,p,o in self.__triples if p == RDF.type]
     else:
         return [split_uri(o)[1] for s,p,o in self.__triples if p == RDF.type]
 def __repr__(cls):
     return "class " + split_uri(cls.uri)[1]
 def __repr__(self):
     return "object " + split_uri(self.uri)[1]
 def export_case(self, graph_description, format_):
     """
     Endpoint for handling the storage of a complete case to the KR.
     """
     case_graph = rdflib.Graph()
     case_graph.parse(data=graph_description, format=format_)
     
     check_blank_node_absence_query = """SELECT ?s ?p ?o
                                         WHERE {
                                             ?s ?p ?o .
                                             FILTER(isBlank(?s) || isBlank(?o))
                                         }
                                     """
     query_result = list(case_graph.query(check_blank_node_absence_query))
     if len(query_result) != 0:
         raise RuntimeError("Blank node are not handled when exporting data to the knowledge repository, but {0} were found."
                            .format(len(query_result)))
     
     check_unique_literal_query = """SELECT ?s ?p (count(?o) as ?count)
                                     WHERE {
                                         ?s ?p ?o .
                                         FILTER(isLiteral(?o))
                                     }
                                     GROUP BY ?s ?p
                                     HAVING (count(?o) > 1)
                                 """
     query_result = list(case_graph.query(check_unique_literal_query))
     if len(query_result) != 0:
         subjet_uri = str(query_result[0][0].toPython())
         predicate = str(query_result[0][1].toPython())
         literal_count = str(query_result[0][2].toPython())
         other_uri_predicate_pair_count = str(len(query_result) - 1)
         raise RuntimeError("The graph must not contain an uri linked to several literals with the same predicate, but the uri " +
                            subjet_uri + " is linked to " + literal_count + " literals by the predicate " + predicate +
                            ". There is " + other_uri_predicate_pair_count + " other uri-predicate pair in the same case in the graph.")
         
     
     
     with self.open_session() as session:
         # The case is deleted from the knowledge repository to handle suppressed nodes from the database
         case_uri = case_graph.query("SELECT ?case_uri WHERE {?case_uri a orion:Case.}", initNs={"orion": rdflib.Namespace(self.orion_ns)})
         if len(case_uri) != 1:
             raise RuntimeError("There must be exactly one case in the provided graph, but {0} were found.".format(len(case_uri)))
         case_uri = list(case_uri)[0][0].toPython()
         self.delete_case(case_uri, session)
         
         for s, p, o in case_graph:
             if isinstance(s, rdflib.term.Literal):
                 raise RuntimeError("A subject must not be a Literal")
             
             predicate_name = split_uri(p)[1]
             if predicate_name == "uri":
                 raise RuntimeError("Can not handle triplet whose predicate name is 'uri', as it is already used for the identifier " +
                                    "property in neo4j. Triplet is :({0}, {1}, {2}).".format(s, p, o))
                 
             if predicate_name == "type":
                 if not str(o).startswith(self.orion_ns):
                     raise RuntimeError("The type of a node must be in the ontology namespace")
                 label = str(o)[len(self.orion_ns):]
                 # Can not use a parameter for label, as it is not supported in neo4j
                 # TODO: Malicious code injection might be possible
                 query = "MERGE (node {uri: $uri}) SET node :`" + label + "`"
                 self.query(query, {"uri": str(s)}, session)
                 continue
                 
             if isinstance(o, rdflib.term.Literal):
                 # Can not use the name of the property as a parameter, as it is not supported in neo4j
                 # TODO: Malicious code injection might be possible
                 query = "MERGE (node {uri: $uri}) SET node.`" + predicate_name + "` = $value"
                 self.query(query, {"uri": str(s), "value":o.toPython()}, session)
                 continue
             
             # TODO: Malicious code injection might be possible
             query = """ MERGE (subject_node {uri: $subject_uri}) 
                         MERGE (object_node {uri: $object_uri})
                         MERGE (subject_node) -[:`""" + predicate_name + """`]-> (object_node)
                     """
             self.query(query, {"subject_uri": str(s), "object_uri": str(o)}, session)
Esempio n. 58
0
    def convert(self, csvreader):

        start = time.time()

        if self.OUT:
            sys.stderr.write("Output to %s\n" % self.OUT.name)

        if self.IDENT != "auto" and not isinstance(self.IDENT, tuple):
            self.IDENT = (self.IDENT,)

        if not self.BASE:
            warnings.warn("No base given, using http://example.org/instances/")
            self.BASE = rdflib.Namespace("http://example.org/instances/")

        if not self.PROPBASE:
            warnings.warn(
                "No property base given, using http://example.org/property/")
            self.PROPBASE = rdflib.Namespace("http://example.org/props/")

        # skip lines at the start
        for x in range(self.SKIP):
            next(csvreader)

        # read header line
        header_labels = list(csvreader.next())
        headers = dict(
            enumerate([self.PROPBASE[toProperty(x)] for x in header_labels]))
        # override header properties if some are given
        for k, v in self.PROPS.items():
            headers[k] = v
            header_labels[k] = split_uri(v)[1]

        if self.DEFINECLASS:
            # output class/property definitions
            self.triple(self.CLASS, RDF.type, RDFS.Class)
            for i in range(len(headers)):
                h, l = headers[i], header_labels[i]
                if h == "" or l == "":
                    continue
                if self.COLUMNS.get(i, self.DEFAULT) == 'ignore':
                    continue
                self.triple(h, RDF.type, RDF.Property)
                self.triple(h, RDFS.label, rdflib.Literal(toPropertyLabel(l)))
                self.triple(h, RDFS.domain, self.CLASS)
                self.triple(h, RDFS.range,
                            self.COLUMNS.get(i, default_node_make).range())

        rows = 0
        for l in csvreader:
            try:
                if self.IDENT == 'auto':
                    uri = self.BASE["%d" % rows]
                else:
                    uri = self.BASE["_".join([quote(x.encode(
                        "utf8").replace(" ", "_"), safe="")
                        for x in index(l, self.IDENT)])]

                if self.LABEL:
                    self.triple(uri, RDFS.label, rdflib.Literal(
                        " ".join(index(l, self.LABEL))))

                if self.CLASS:
                    # type triple
                    self.triple(uri, RDF.type, self.CLASS)

                for i, x in enumerate(l):
                    x = x.strip()
                    if x != '':
                        if self.COLUMNS.get(i, self.DEFAULT) == 'ignore':
                            continue
                        try:
                            o = self.COLUMNS.get(i, rdflib.Literal)(x)
                            if isinstance(o, list):
                                for _o in o:
                                    self.triple(uri, headers[i], _o)
                            else:
                                self.triple(uri, headers[i], o)

                        except Exception as e:
                            warnings.warn(
                                "Could not process value for column " +
                                "%d:%s in row %d, ignoring: %s " % (
                                    i, headers[i], rows, e.message))

                rows += 1
                if rows % 100000 == 0:
                    sys.stderr.write(
                        "%d rows, %d triples, elapsed %.2fs.\n" % (
                            rows, self.triples, time.time() - start))
            except:
                sys.stderr.write("Error processing line: %d\n" % rows)
                raise

        # output types/labels for generated URIs
        classes = set()
        for l, x in uris.items():
            u, c = x
            self.triple(u, RDFS.label, rdflib.Literal(l))
            if c:
                c = rdflib.URIRef(c)
                classes.add(c)
                self.triple(u, RDF.type, c)

        for c in classes:
            self.triple(c, RDF.type, RDFS.Class)

        self.OUT.close()
        sys.stderr.write(
            "Converted %d rows into %d triples.\n" % (rows, self.triples))
        sys.stderr.write("Took %.2f seconds.\n" % (time.time() - start))
Esempio n. 59
0
 def _relabel_predicate(self, predicate):
     if type(predicate) is unicode:
         predicate = URIRef(predicate)
     return FIELD_NAMES.get(predicate, split_uri(predicate)[1])
Esempio n. 60
0
    def get(self, uriref, simplify=True):
        """Download all related information for `uriref` from the remote
        data source.

        Collects all the triples from the remote data source where `uriref` is
        the subject. If `uriref` is not the subject of any triples, it is re-queried
        as a predicate, storing the subject-object pairs.

        Any objects (and subjects) which are themselves :class:`rdflib.term.URIRef` instances
        will be converted into :class:`BoundURIRef` which will silently fetch the relevant
        entity from the remote source.

        If the predicate matches a processor rules, instead of it's object value being
        stored, the object will be transformed by each rule in the processor chain.

        Parameters
        ----------
        uriref: str or rdflib.term.URIRef
            A subject or predicate.
        simplify: bool, optional
            If true, any predicate with a single value will be a scalar,
            and any other will be a list.

        Returns
        -------
        ReferenceEntity
            An object representing the subject whose attributes are named after
            predicates with their objects as values.
        """
        if not isinstance(uriref, URIRef):
            uriref = self.accession_to_uriref(uriref)
        if uriref in self.cache:
            return self.cache[uriref]
        results = defaultdict(list)
        for subject, predicate, obj in set(self.triples((uriref, None, None))):
            predicate_name = _camel_to_snake(split_uri(predicate)[1])
            self._predicates_seen.add(predicate)
            if isinstance(obj, Literal):
                obj = obj.toPython()
            elif isinstance(obj, URIRef):
                obj = BoundURIRef(obj, source=self)
            if predicate in self.predicate_processor_map:
                obj = self.predicate_processor_map(predicate, results, obj)
            if obj is not None:
                results[predicate_name].append(obj)

        # If there were no results, the query might be a predicate, so try to find all the
        # pairs that satisfy it.
        if len(results) == 0:
            predicate_name = _camel_to_snake(split_uri(uriref)[1])
            for subject, predicate, obj in set(self.triples((None, uriref, None))):
                if isinstance(obj, Literal):
                    obj = obj.toPython()
                elif isinstance(obj, URIRef):
                    obj = BoundURIRef(obj, source=self)

                if isinstance(subject, Literal):
                    subject = subject.toPython()
                elif isinstance(subject, URIRef):
                    subject = BoundURIRef(subject, source=self)

                results[predicate_name].append((subject, obj))

        if simplify:
            results = {k: v if len(v) > 1 else v[0] for k, v in results.items()}
        results = ReferenceEntity(uriref, **results)
        if len(self.cache) > self.cache_size:
            self.cache.popitem()
            self.cache[uriref] = results
        return results