예제 #1
0
 def get_ext_ident_object(self, triple_store, subject, field):
     ident = [
         value.split('/')[-1] if _is_valid_uri(value) else value
         for value in triple_store.objects(subject=subject,
                                           predicate=field.predicate)
     ]
     if not ident and _is_valid_uri(subject):
         ident = [subject.split('/')[-1]]
     return ident
예제 #2
0
    def compute_qname(self, uri, generate=True):

        if not _is_valid_uri(uri):
            raise ValueError(
                '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'.format(uri)
            )

        if uri not in self.__cache:
            namespace, name = split_uri(uri)
            namespace = URIRef(namespace)
            prefix = self.store.prefix(namespace)
            if prefix is None:
                if not generate:
                    raise KeyError(
                        "No known prefix for {} and generate=False".format(namespace)
                    )
                num = 1
                while 1:
                    prefix = "ns%s" % num
                    if not self.store.namespace(prefix):
                        break
                    num += 1
                self.bind(prefix, namespace)
            self.__cache[uri] = (prefix, namespace, name)
        return self.__cache[uri]
예제 #3
0
    def __get_uri_from_graphfile_blob(self, oid):
        """Search for a graph uri in graph file and return it.

        Args
        ----
           oid: String oid of a graph file

        Returns
        -------
            graphuri: String with the graph URI

        """
        try:
            blob = self.repository.get(oid)
        except ValueError:
            logger.debug(
                "Object with OID {} not found in repository.".format(oid))
            return

        content = blob.read_raw().decode().strip()

        if content and _is_valid_uri(content):
            return content
        raise InvalidConfigurationError(
            "No graph URI found in blob with OID {}.".format(oid))
예제 #4
0
    def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier:
        """
        Prepare the object of a triple.

        Parameters
        ----------
        prop: str
            property name
        prop_type: str
            property type
        value: Any
            property value

        Returns
        -------
        rdflib.term.Identifier
            An instance of rdflib.term.Identifier

        """
        if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI':
            if isinstance(value, str) and PrefixManager.is_curie(value):
                o = self.uriref(value)
            elif isinstance(value, str) and PrefixManager.is_iri(value):
                if _is_valid_uri(value):
                    o = URIRef(value)
                else:
                    o = Literal(value)
            else:
                o = Literal(value)
        elif prop_type.startswith('xsd'):
            o = Literal(value, datatype=self.prefix_manager.expand(prop_type))
        else:
            o = Literal(value, datatype=self.prefix_manager.expand("xsd:string"))
        return o
예제 #5
0
    def compute_qname(self, uri, generate=True):

        if not _is_valid_uri(uri):
            raise ValueError(
                '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'
                .format(uri))

        if uri not in self.__cache:
            try:
                namespace, name = split_uri(uri)
            except ValueError as e:
                namespace = URIRef(uri)
                prefix = self.store.prefix(namespace)
                if not prefix:
                    raise e
            if namespace not in self.__strie:
                insert_strie(self.__strie, self.__trie, namespace)

            if self.__strie[namespace]:
                pl_namespace = get_longest_namespace(self.__strie[namespace],
                                                     uri)
                if pl_namespace is not None:
                    namespace = pl_namespace
                    name = uri[len(namespace):]

            namespace = URIRef(namespace)
            prefix = self.store.prefix(
                namespace)  # warning multiple prefixes problem

            if prefix is None:
                if not generate:
                    raise KeyError(
                        "No known prefix for {} and generate=False".format(
                            namespace))
                num = 1
                while 1:
                    prefix = "ns%s" % num
                    if not self.store.namespace(prefix):
                        break
                    num += 1
                self.bind(prefix, namespace)
            self.__cache[uri] = (prefix, namespace, name)
        return self.__cache[uri]
예제 #6
0
    def compute_qname(self, uri, generate=True):

        if not _is_valid_uri(uri): 
            raise Exception('"%s" does not look like a valid URI, I cannot serialize this. Perhaps you wanted to urlencode it?'%uri)


        if not uri in self.__cache:
            namespace, name = split_uri(uri)
            namespace = URIRef(namespace)
            prefix = self.store.prefix(namespace)
            if prefix is None:
                if not generate:
                    raise Exception(
                        "No known prefix for %s and generate=False")
                num = 1
                while 1:
                    prefix = "ns%s" % num
                    if not self.store.namespace(prefix):
                        break
                    num += 1
                self.bind(prefix, namespace)
            self.__cache[uri] = (prefix, namespace, name)
        return self.__cache[uri]
예제 #7
0
파일: conf.py 프로젝트: AKSW/QuitStore
    def __get_uri_from_graphfile_blob(self, oid):
        """Search for a graph uri in graph file and return it.

        Args
        ----
           oid: String oid of a graph file

        Returns
        -------
            graphuri: String with the graph URI

        """
        try:
            blob = self.repository.get(oid)
        except ValueError:
            logger.debug("Object with OID {} not found in repository.".format(oid))
            return

        content = blob.read_raw().decode().strip()

        if content and _is_valid_uri(content):
            return content
        raise InvalidConfigurationError("No graph URI found in blob with OID {}.".format(oid))
    def compute_qname(self, uri, generate=True):

        if not _is_valid_uri(uri):
            raise Exception('"%s" does not look like a valid URI, I cannot serialize this. Perhaps you wanted to urlencode it?'%uri)


        if not uri in self.__cache:
            namespace, name = split_uri(uri)
            namespace = URIRef(namespace)
            prefix = self.store.prefix(namespace)
            if prefix is None:
                if not generate:
                    raise Exception(
                        "No known prefix for %s and generate=False")
                num = 1
                while 1:
                    prefix = "ns%s" % num
                    if not self.store.namespace(prefix):
                        break
                    num += 1
                self.bind(prefix, namespace)
            self.__cache[uri] = (prefix, namespace, name)
        return self.__cache[uri]
예제 #9
0
 def isValidURI(self, str_uri):
     
     #use term._is_valid_unicode(str_uri)
     
     return term._is_valid_uri(str_uri) and self.isascii(str_uri)
예제 #10
0
    def _validate(self, value):
        super(URIRefField, self)._validate(value)

        if not _is_valid_uri(value):
            raise InvalidURI(value)
예제 #11
0
def __cli_parse__(args):
    # remove the not well formed sentences? add option?
    # print MRS or parse to DMRS format?

    path = args.profile
    prefix = args.prefix.strip("/")
    semrep = args.semrep.lower()
    parser = None
    # Setting verbosity; need to figure a better solution.
    if args.verbosity == 1:
        logger.setLevel(20)
    elif args.verbosity >= 2:
        logger.setLevel(10)

    try:
        # validates path
        if not isdir(path):
            raise NotADirectoryError(f"Path is not a directory: {path}")
        # validates profile
        if not is_database_directory(path):
            raise TSDBError(f'Invalid test suite directory: {path}')
        # validates URI prefix
        if not _is_valid_uri(prefix):
            raise Exception(f'Invalid URI: {prefix}')
        # validate format and get converter
        to_rdf, from_mrs = _get_converters(semrep)

        # open Test Suite and start conversion
        ts = itsdb.TestSuite(path)
        # logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")
        logger.info(
            f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}"
        )

        # Creating the store and the default graph
        store = plugin.get("IOMemory", Store)()
        defaultGraph = Graph(store, identifier=BNode())
        PROFILE = URIRef(f"{prefix}")  # review later
        defaultGraph.add((PROFILE, RDF.type, DELPH.Profile))
        semrepURI, prof_semrep_relation = _get_RDF_semrep(semrep, store)
        store.bind("erg", ERG)
        store.bind("delph", DELPH)
        store.bind("pos", POS)
        # store.bind("upref", prefix) # may be useful

        # The tsql takes some time to be processed:
        # logger.info(f"Loading the profile")
        logger.info(f"Loading the profile")
        profile_data = tsql.select('parse-id result-id i-input mrs', ts)
        logger.info(f"Converting the profile")
        # Iterating over the results:
        for (parse_id, result_id, text, mrs_string) in profile_data:
            logger.debug(
                f"Converting the result {result_id} of sentence {parse_id}")
            m = simplemrs.decode(mrs_string)

            # making sure of the well formedness of "m"
            if not is_well_formed(m):
                logger.warning(
                    f"Result {result_id} of sentence {parse_id} is not well formed"
                )
                # continue

            # converting the MRS object to the representation intended to be converted
            obj = from_mrs(m)
            # logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}")

            # Creating URIs for relevant resources.
            ITEM = URIRef(
                f"{prefix}/{parse_id}"
            )  # The item part may be redundant, maybe iterate before the itens
            RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}")
            SEMREPI = URIRef(f"{prefix}/{parse_id}/{result_id}/{semrep}")

            # adding types:
            defaultGraph.add((ITEM, RDF.type, DELPH.Item))
            defaultGraph.add((RESULT, RDF.type, DELPH.Result))
            defaultGraph.add((SEMREPI, RDF.type, semrepURI))

            # Associating text to item:
            defaultGraph.add((ITEM, DELPH.hasText, Literal(text)))

            # Linking those nodes:
            defaultGraph.add((PROFILE, DELPH.hasItem, ITEM))
            defaultGraph.add((ITEM, DELPH.hasResult, RESULT))
            defaultGraph.add((RESULT, prof_semrep_relation, SEMREPI))

            to_rdf(obj, SEMREPI, store, defaultGraph)

        # serializes results
        logger.info(f"Serializing results to {args.output}")
        ConjunctiveGraph(store).serialize(destination=args.output,
                                          format=args.format)
        logger.info(f"DONE")

    # except PyDelphinSyntaxError as e:
    #     logger.exception(e)
    # except ImportError as e:
    #     logger.exception(e)
    # except TSDBError as e:
    #     logger.exception(e)
    except Exception as e:
        logger.error(e)