Beispiel #1
0
def load_context(url):
    """
    A self-aware document loader.  For those contexts MediaGoblin
    stores internally, load them from disk.
    """
    if url in _CONTEXT_CACHE:
        return _CONTEXT_CACHE[url]

    # See if it's one of our basic ones
    document = BUILTIN_CONTEXTS.get(url, None)

    # No?  See if we have an internal schema for this
    if document is None:
        document = hook_handle(("context_url_data", url))

    # Okay, if we've gotten a document by now... let's package it up
    if document is not None:
        document = {'contextUrl': None,
                    'documentUrl': url,
                    'document': document}

    # Otherwise, use jsonld.load_document
    else:
        document = jsonld.load_document(url)

    # cache
    _CONTEXT_CACHE[url] = document
    return document
def load_context(url):
    """
    A self-aware document loader.  For those contexts MediaGoblin
    stores internally, load them from disk.
    """
    if url in _CONTEXT_CACHE:
        return _CONTEXT_CACHE[url]

    # See if it's one of our basic ones
    document = BUILTIN_CONTEXTS.get(url, None)

    # No?  See if we have an internal schema for this
    if document is None:
        document = hook_handle(("context_url_data", url))

    # Okay, if we've gotten a document by now... let's package it up
    if document is not None:
        document = {
            'contextUrl': None,
            'documentUrl': url,
            'document': document
        }

    # Otherwise, use jsonld.load_document
    else:
        document = jsonld.load_document(url)

    # cache
    _CONTEXT_CACHE[url] = document
    return document
def ema_url_from_jsonld(jsonld_filename):
    """ Takes a .jsonld filename and extracts the full EMA request URL. """
    nanopub = jsonld.load_document(f"{NANOPUB_URL}/{jsonld_filename}")
    for graph in nanopub["document"]:
        for item in graph["@graph"]:
            if W3C_HAS_SRC in item:
                return item[W3C_HAS_SRC][0]['@id']
    return None
Beispiel #4
0
 def loader(url):
     if url in _url_map:
         return _url_map[url]
     elif load_unknown_urls:
         doc = jsonld.load_document(url)
         # @@: Is this optimization safe in all cases?
         if isinstance(doc["document"], str):
             doc["document"] = json.loads(doc["document"])
         if cache_externally_loaded:
             _url_map[url] = doc
         return doc
     else:
         raise jsonld.JsonLdError(
             "url not found and loader set to not load unknown URLs.",
             {'url': url})
def cached_load_document(url):
    """Read local cached copy of URL if available, else fallback to network."""
    filepath = in_cache(url)
    if (filepath is None):
        logging.debug("Using default loader to get %s" % (url))
        return(jsonld.load_document(url))
    else:
        logging.debug("Reading %s from %s" % (url, filepath))
        data = open(filepath, 'r').read()
        doc = {
            'contextUrl': None,
            'documentUrl': None,
            'document': data
        }
        return doc
Beispiel #6
0
 def loader(url):
     if url in _url_map:
         return _url_map[url]
     elif load_unknown_urls:
         doc = jsonld.load_document(url)
         # @@: Is this optimization safe in all cases?
         if isinstance(doc["document"], str):
             doc["document"] = json.loads(doc["document"])
         if cache_externally_loaded:
             _url_map[url] = doc
         return doc
     else:
         raise jsonld.JsonLdError(
             "url not found and loader set to not load unknown URLs.",
             {'url': url})
def load_document(url):
    """Retrieves JSON-LD for the given URL from a local
    file if available, and falls back to the network.
    """
    files = {
        AnnotationWriter.JSONLD_CONTEXT: "anno.jsonld",
        AnnotationWriter.LDP_CONTEXT: "ldp.jsonld",
    }
    if url in files:
        base_path = os.path.join(os.path.split(__file__)[0], "jsonld")
        jsonld_file = os.path.join(base_path, files[url])
        data = open(jsonld_file).read()
        doc = {"contextUrl": None, "documentUrl": url, "document": data}
        return doc
    else:
        return jsonld.load_document(url)
Beispiel #8
0
    def lookup_by_identifier(self, identifier, processed_uris=set()):
        """Turn an Identifier into a JSON-LD document."""
        if identifier.type == Identifier.OCLC_WORK:
            foreign_type = 'work'
            url = self.WORK_BASE_URL
        elif identifier.type == Identifier.OCLC_NUMBER:
            foreign_type = "oclc"
            url = self.BASE_URL

        url = url % dict(id=identifier.identifier, type=foreign_type)
        if url in processed_uris:
            self.log.debug("SKIPPING %s, already processed.", url)
            return None, True
        processed_uris.add(url)
        representation, cached = Representation.get(self._db, url)
        try:
            data = jsonld.load_document(url)
        except Exception, e:
            self.log.error("EXCEPTION on %s: %s", url, e, exc_info=e)
            return None, False
def load_document(url):
    """Retrieves JSON-LD for the given URL from a local
    file if available, and falls back to the network.
    """
    files = {
        AnnotationWriter.JSONLD_CONTEXT: "anno.jsonld",
        AnnotationWriter.LDP_CONTEXT: "ldp.jsonld"
    }
    if url in files:
        base_path = os.path.join(os.path.split(__file__)[0], 'jsonld')
        jsonld_file = os.path.join(base_path, files[url])
        data = open(jsonld_file).read()
        doc = {
            "contextUrl": None,
            "documentUrl": url,
            "document": data.decode('utf-8')
        }
        return doc
    else:
        return jsonld.load_document(url)
    def get_jsonld(self, url):
        representation, cached = Representation.get(self._db, url)
        try:
            data = jsonld.load_document(url)
        except Exception as e:
            self.log.error("EXCEPTION on %s: %s", url, e, exc_info=e)
            return None, False

        if cached and not representation.content:
            representation, cached = Representation.get(
                self._db, url, max_age=0)

        if not representation.content:
            return None, False
        
        doc = {
            'contextUrl': None,
            'documentUrl': url,
            'document': representation.content.decode('utf8')
        }
        return doc, cached
Beispiel #11
0
    def get_jsonld(self, url):
        representation, cached = Representation.get(self._db, url)
        try:
            data = jsonld.load_document(url)
        except Exception as e:
            self.log.error("EXCEPTION on %s: %s", url, e, exc_info=e)
            return None, False

        if cached and not representation.content:
            representation, cached = Representation.get(self._db,
                                                        url,
                                                        max_age=0)

        if not representation.content:
            return None, False

        doc = {
            'contextUrl': None,
            'documentUrl': url,
            'document': representation.content.decode('utf8')
        }
        return doc, cached
Beispiel #12
0
def _cached_load_document(url):
    """Loader of pyld document from a url, which caches loaded instance on disk
    """
    doc_fname = _get_schema_url_cache_filename(url)

    doc = None
    if os.path.exists(doc_fname):
        try:
            lgr.debug("use cached request result to '%s' from %s", url, doc_fname)
            doc = pickle.load(open(doc_fname, 'rb'))
        except Exception as e:  # it is OK to ignore any error and fall back on the true source
            lgr.warning(
                "cannot load cache from '%s', fall back on schema download: %s",
                doc_fname, exc_str(e))

    if doc is None:
        from pyld.jsonld import load_document
        doc = load_document(url)
        assure_dir(dirname(doc_fname))
        # use pickle to store the entire request result dict
        pickle.dump(doc, open(doc_fname, 'wb'))
        lgr.debug("stored result of request to '{}' in {}".format(url, doc_fname))
    return doc
Beispiel #13
0
    def _fetch_context(self, active_ctx, url, cycles):
        # check for max context URLs fetched during a resolve operation
        if len(cycles) > MAX_CONTEXT_URLS:
            raise jsonld.JsonLdError(
                'Maximum number of @context URLs exceeded.',
                'jsonld.ContextUrlError', {'max': MAX_CONTEXT_URLS},
                code=('loading remote context failed'
                      if active_ctx.get('processingMode') == 'json-ld-1.0'
                      else 'context overflow'))

        # check for context URL cycle
        # shortcut to avoid extra work that would eventually hit the max above
        if url in cycles:
            raise jsonld.JsonLdError(
                'Cyclical @context URLs detected.',
                'jsonld.ContextUrlError', {'url': url},
                code=('recursive context inclusion'
                      if active_ctx.get('processingMode') == 'json-ld-1.0'
                      else 'context overflow'))

        # track cycles
        cycles.add(url)

        try:
            remote_doc = jsonld.load_document(url,
                {'documentLoader': self.document_loader},
                requestProfile='http://www.w3.org/ns/json-ld#context')
            context = remote_doc.get('document', url)
        except Exception as cause:
            raise jsonld.JsonLdError(
                'Dereferencing a URL did not result in a valid JSON-LD object. ' +
                'Possible causes are an inaccessible URL perhaps due to ' +
                'a same-origin policy (ensure the server uses CORS if you are ' +
                'using client-side JavaScript), too many redirects, a ' +
                'non-JSON response, or more than one HTTP Link Header was ' +
                'provided for a remote context.',
                'jsonld.InvalidUrl',
                {'url': url, 'cause': cause},
                code='loading remote context failed')

        # ensure ctx is an object
        if not isinstance(context, dict) and not isinstance(context, frozendict):
            raise jsonld.JsonLdError(
                'Dereferencing a URL did not result in a JSON object. The ' +
                'response was valid JSON, but it was not a JSON object.',
                'jsonld.InvalidUrl',
                {'url': url},
                code='invalid remote context')

        # use empty context if no @context key is present
        if '@context' not in context:
            context = {'@context': {}}
        else:
            context = {'@context': context['@context']}

        # append @context URL to context if given
        if remote_doc['contextUrl']:
            if not isinstance(context['@context'], list):
                context['@context'] = [context['@context']]
            context['@context'].append(remote_doc['contextUrl'])

        return (context, remote_doc)