コード例 #1
0
    def __init__(
            self,
            fetcher=None,  # type: Optional[Fetcher]
            namespaces=None,  # type: Optional[Dict[Text, Text]]
            fileuri=None,  # type: Optional[Text]
            copyfrom=None,  # type: Optional[LoadingOptions]
            schemas=None,  # type: Optional[List[Text]]
            original_doc=None,  # type: Optional[Any]
    ):  # type: (...) -> None
        self.idx = {}  # type: Dict[Text, Text]
        self.fileuri = fileuri  # type: Optional[Text]
        self.namespaces = namespaces
        self.schemas = schemas
        self.original_doc = original_doc
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                self.fileuri = copyfrom.fileuri
            if namespaces is None:
                self.namespaces = copyfrom.namespaces
            if namespaces is None:
                schemas = copyfrom.schemas

        if fetcher is None:
            import requests
            from cachecontrol.wrapper import CacheControl
            from cachecontrol.caches import FileCache
            from schema_salad.ref_resolver import DefaultFetcher

            if "HOME" in os.environ:
                session = CacheControl(
                    requests.Session(),
                    cache=FileCache(
                        os.path.join(os.environ["HOME"], ".cache", "salad")),
                )
            elif "TMPDIR" in os.environ:
                session = CacheControl(
                    requests.Session(),
                    cache=FileCache(
                        os.path.join(os.environ["TMPDIR"], ".cache", "salad")),
                )
            else:
                session = CacheControl(requests.Session(),
                                       cache=FileCache("/tmp", ".cache",
                                                       "salad"))
            self.fetcher = DefaultFetcher({}, session)  # type: Fetcher
        else:
            self.fetcher = fetcher

        self.vocab = _vocab
        self.rvocab = _rvocab

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in iteritems(namespaces):
                self.vocab[k] = v
                self.rvocab[v] = k
コード例 #2
0
    def __init__(self,
                 fetcher=None,
                 namespaces=None,
                 fileuri=None,
                 copyfrom=None,
                 schemas=None):
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                fileuri = copyfrom.fileuri
            if namespaces is None:
                namespaces = copyfrom.namespaces
            if namespaces is None:
                schemas = copyfrom.schemas
        else:
            self.idx = {}

        if fetcher is None:
            import os
            import requests
            from cachecontrol.wrapper import CacheControl
            from cachecontrol.caches import FileCache
            from schema_salad.ref_resolver import DefaultFetcher
            if "HOME" in os.environ:
                session = CacheControl(requests.Session(),
                                       cache=FileCache(
                                           os.path.join(
                                               os.environ["HOME"], ".cache",
                                               "salad")))
            elif "TMPDIR" in os.environ:
                session = CacheControl(requests.Session(),
                                       cache=FileCache(
                                           os.path.join(
                                               os.environ["TMPDIR"], ".cache",
                                               "salad")))
            else:
                session = CacheControl(requests.Session(),
                                       cache=FileCache("/tmp", ".cache",
                                                       "salad"))
            self.fetcher = DefaultFetcher({}, session)
        else:
            self.fetcher = fetcher

        self.fileuri = fileuri

        self.vocab = _vocab
        self.rvocab = _rvocab
        self.namespaces = namespaces
        self.schemas = schemas

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in iteritems(namespaces):
                self.vocab[k] = v
                self.rvocab[v] = k
コード例 #3
0
    def __init__(self,
                 ctx,
                 schemagraph=None,
                 foreign_properties=None,
                 idx=None,
                 cache=None,
                 session=None):
        # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
        normalize = lambda url: urlparse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: Loader.ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        self.session = None  # type: requests.sessions.Session
        if session is not None:
            self.session = session
        else:
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache(
                                            os.path.join(
                                                os.environ["HOME"], ".cache",
                                                "salad")))

        self.url_fields = None  # type: Set[unicode]
        self.scoped_ref_fields = None  # type: Dict[unicode, int]
        self.vocab_fields = None  # type: Set[unicode]
        self.identifiers = None  # type: Set[unicode]
        self.identity_links = None  # type: Set[unicode]
        self.standalone = None  # type: Set[unicode]
        self.nolinkcheck = None  # type: Set[unicode]
        self.vocab = {}  # type: Dict[unicode, unicode]
        self.rvocab = {}  # type: Dict[unicode, unicode]
        self.idmap = None  # type: Dict[unicode, Any]
        self.mapPredicate = None  # type: Dict[unicode, unicode]
        self.type_dsl_fields = None  # type: Set[unicode]

        self.add_context(ctx)
コード例 #4
0
def test_cache_control_sets_sort_query():
    for bool_ in (True, False):
        sess = CacheControl(Session(),
                            mock.Mock(spec=DictCache),
                            sort_query=bool_)
        assert sess.adapters['http://'].sort_query == bool_
        assert sess.adapters['http://'].controller.sort_query == bool_
コード例 #5
0
    def __init__(
        self,
        fetcher: Optional[Fetcher] = None,
        namespaces: Optional[Dict[str, str]] = None,
        schemas: Optional[Dict[str, str]] = None,
        fileuri: Optional[str] = None,
        copyfrom: Optional["LoadingOptions"] = None,
        original_doc: Optional[Any] = None,
    ) -> None:
        """Create a LoadingOptions object."""
        self.idx: Dict[str, Dict[str, Any]] = {}
        self.fileuri: Optional[str] = fileuri
        self.namespaces = namespaces
        self.schemas = schemas
        self.original_doc = original_doc
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                self.fileuri = copyfrom.fileuri
            if namespaces is None:
                self.namespaces = copyfrom.namespaces
            if schemas is None:
                self.schemas = copyfrom.schemas

        if fetcher is None:
            import requests
            from cachecontrol.caches import FileCache
            from cachecontrol.wrapper import CacheControl

            root = pathlib.Path(os.environ.get("HOME", tempfile.gettempdir()))
            session = CacheControl(
                requests.Session(),
                cache=FileCache(root / ".cache" / "salad"),
            )
            self.fetcher: Fetcher = DefaultFetcher({}, session)
        else:
            self.fetcher = fetcher

        self.vocab = _vocab
        self.rvocab = _rvocab

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in namespaces.items():
                self.vocab[k] = v
                self.rvocab[v] = k
コード例 #6
0
    def __init__(
            self,
            fetcher=None,  # type: Optional[Fetcher]
            namespaces=None,  # type: Optional[Dict[str, str]]
            schemas=None,  # type: Optional[Dict[str, str]]
            fileuri=None,  # type: Optional[str]
            copyfrom=None,  # type: Optional[LoadingOptions]
            original_doc=None,  # type: Optional[Any]
    ):  # type: (...) -> None
        self.idx = {}  # type: Dict[str, Dict[str, Any]]
        self.fileuri = fileuri  # type: Optional[str]
        self.namespaces = namespaces
        self.schemas = schemas
        self.original_doc = original_doc
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                self.fileuri = copyfrom.fileuri
            if namespaces is None:
                self.namespaces = copyfrom.namespaces
            if schemas is None:
                self.schemas = copyfrom.schemas

        if fetcher is None:
            import requests
            from cachecontrol.caches import FileCache
            from cachecontrol.wrapper import CacheControl

            root = pathlib.Path(os.environ.get("HOME", tempfile.gettempdir()))
            session = CacheControl(
                requests.Session(),
                cache=FileCache(root / ".cache" / "salad"),
            )
            self.fetcher: Fetcher = DefaultFetcher({}, session)
        else:
            self.fetcher = fetcher

        self.vocab = _vocab
        self.rvocab = _rvocab

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in namespaces.items():
                self.vocab[k] = v
                self.rvocab[v] = k
コード例 #7
0
    def __init__(self):

        session = requests.Session()

        if not self.__class__._cache:
            if self.backend == "RedisCache":
                pool = redis.ConnectionPool(host=self.redis_host, port=self.redis_port, db=0)
                r = redis.Redis(connection_pool=pool)
                self.__class__._cache = RedisCache(r)
            elif self.backend == "FileCache":
                self.__class__._cache = FileCache(self.file_cache_path)
            else:
                self.__class__._cache = DictCache()

        session = CacheControl(session, heuristic=DefaultHeuristic(self.expire_after), cache=self.__class__._cache)

        super(CachedRemoteResource, self).__init__(session)
コード例 #8
0
    def __init__(self, ctx, schemagraph=None, foreign_properties=None,
                 idx=None, cache=None, session=None):
        # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
        normalize = lambda url: urlparse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: Loader.ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        self.session = None  # type: requests.sessions.Session
        if session is not None:
            self.session = session
        else:
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))

        self.url_fields = None  # type: Set[unicode]
        self.scoped_ref_fields = None  # type: Dict[unicode, int]
        self.vocab_fields = None  # type: Set[unicode]
        self.identifiers = None  # type: Set[unicode]
        self.identity_links = None  # type: Set[unicode]
        self.standalone = None  # type: Set[unicode]
        self.nolinkcheck = None  # type: Set[unicode]
        self.vocab = {}  # type: Dict[unicode, unicode]
        self.rvocab = {}  # type: Dict[unicode, unicode]
        self.idmap = None  # type: Dict[unicode, Any]
        self.mapPredicate = None  # type: Dict[unicode, unicode]
        self.type_dsl_fields = None  # type: Set[unicode]

        self.add_context(ctx)
コード例 #9
0
    def __init__(
        self,
        ctx: ContextType,
        schemagraph: Optional[Graph] = None,
        foreign_properties: Optional[Set[str]] = None,
        idx: Optional[IdxType] = None,
        cache: Optional[CacheType] = None,
        session: Optional[requests.sessions.Session] = None,
        fetcher_constructor: Optional[FetcherCallableType] = None,
        skip_schemas: Optional[bool] = None,
        url_fields: Optional[Set[str]] = None,
        allow_attachments: Optional[AttachmentsType] = None,
        doc_cache: Union[str, bool] = True,
    ) -> None:

        self.idx = (NormDict(lambda url: urllib.parse.urlsplit(url).geturl())
                    if idx is None else idx)  # type: IdxType

        self.ctx = {}  # type: ContextType
        self.graph = schemagraph if schemagraph is not None else Graph()
        self.foreign_properties = (set(foreign_properties) if
                                   foreign_properties is not None else set())
        self.cache = cache if cache is not None else {}
        self.skip_schemas = skip_schemas if skip_schemas is not None else False

        if session is None:
            if doc_cache is False:
                self.session = requests.Session()
            elif doc_cache is True:
                if "HOME" in os.environ:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(
                            os.path.join(os.environ["HOME"], ".cache",
                                         "salad")),
                    )
                elif "TMP" in os.environ:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(
                            os.path.join(os.environ["TMP"], ".cache",
                                         "salad")),
                    )
                else:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(os.path.join("/tmp", ".cache",
                                                     "salad")),
                    )
            elif isinstance(doc_cache, str):
                self.session = CacheControl(requests.Session(),
                                            cache=FileCache(doc_cache))
        else:
            self.session = session

        self.fetcher_constructor = (fetcher_constructor if fetcher_constructor
                                    is not None else DefaultFetcher)
        self.fetcher = self.fetcher_constructor(self.cache, self.session)
        self.fetch_text = self.fetcher.fetch_text
        self.check_exists = self.fetcher.check_exists
        self.url_fields = (set() if url_fields is None else set(url_fields)
                           )  # type: Set[str]
        self.scoped_ref_fields = {}  # type: Dict[str, int]
        self.vocab_fields = set()  # type: Set[str]
        self.identifiers = []  # type: List[str]
        self.identity_links = set()  # type: Set[str]
        self.standalone = None  # type: Optional[Set[str]]
        self.nolinkcheck = set()  # type: Set[str]
        self.vocab = {}  # type: Dict[str, str]
        self.rvocab = {}  # type: Dict[str, str]
        self.idmap = {}  # type: Dict[str, str]
        self.mapPredicate = {}  # type: Dict[str, str]
        self.type_dsl_fields = set()  # type: Set[str]
        self.subscopes = {}  # type:  Dict[str, str]
        self.secondaryFile_dsl_fields = set()  # type: Set[str]
        self.allow_attachments = allow_attachments

        self.add_context(ctx)
    def __init__(
        self,
        ctx,  # type: ContextType
        schemagraph=None,  # type: rdflib.graph.Graph
        foreign_properties=None,  # type: Set[Text]
        idx=None,  # type: Dict[Text, Union[CommentedMap, CommentedSeq, Text, None]]
        cache=None,  # type: Dict[Text, Any]
        session=None,  # type: requests.sessions.Session
        fetcher_constructor=None,  # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
        skip_schemas=None  # type: bool
    ):
        # type: (...) -> None

        normalize = lambda url: urllib.parse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        if skip_schemas is not None:
            self.skip_schemas = skip_schemas
        else:
            self.skip_schemas = False

        if session is None:
            if "HOME" in os.environ:
                self.session = CacheControl(requests.Session(),
                                            cache=FileCache(
                                                os.path.join(
                                                    os.environ["HOME"],
                                                    ".cache", "salad")))
            elif "TMP" in os.environ:
                self.session = CacheControl(requests.Session(),
                                            cache=FileCache(
                                                os.path.join(
                                                    os.environ["TMP"],
                                                    ".cache", "salad")))
            else:
                self.session = CacheControl(requests.Session(),
                                            cache=FileCache(
                                                "/tmp", ".cache", "salad"))
        else:
            self.session = session

        if fetcher_constructor is not None:
            self.fetcher_constructor = fetcher_constructor
        else:
            self.fetcher_constructor = DefaultFetcher
        self.fetcher = self.fetcher_constructor(self.cache, self.session)

        self.fetch_text = self.fetcher.fetch_text
        self.check_exists = self.fetcher.check_exists

        self.url_fields = set()  # type: Set[Text]
        self.scoped_ref_fields = {}  # type: Dict[Text, int]
        self.vocab_fields = set()  # type: Set[Text]
        self.identifiers = []  # type: List[Text]
        self.identity_links = set()  # type: Set[Text]
        self.standalone = None  # type: Optional[Set[Text]]
        self.nolinkcheck = set()  # type: Set[Text]
        self.vocab = {}  # type: Dict[Text, Text]
        self.rvocab = {}  # type: Dict[Text, Text]
        self.idmap = {}  # type: Dict[Text, Any]
        self.mapPredicate = {}  # type: Dict[Text, Text]
        self.type_dsl_fields = set()  # type: Set[Text]

        self.add_context(ctx)
コード例 #11
0
class Loader(object):

    ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
    DocumentType = TypeVar('DocumentType', List, Dict[unicode, Any])

    def __init__(self,
                 ctx,
                 schemagraph=None,
                 foreign_properties=None,
                 idx=None,
                 cache=None,
                 session=None):
        # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
        normalize = lambda url: urlparse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: Loader.ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        self.session = None  # type: requests.sessions.Session
        if session is not None:
            self.session = session
        else:
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache(
                                            os.path.join(
                                                os.environ["HOME"], ".cache",
                                                "salad")))

        self.url_fields = None  # type: Set[unicode]
        self.scoped_ref_fields = None  # type: Dict[unicode, int]
        self.vocab_fields = None  # type: Set[unicode]
        self.identifiers = None  # type: Set[unicode]
        self.identity_links = None  # type: Set[unicode]
        self.standalone = None  # type: Set[unicode]
        self.nolinkcheck = None  # type: Set[unicode]
        self.vocab = {}  # type: Dict[unicode, unicode]
        self.rvocab = {}  # type: Dict[unicode, unicode]
        self.idmap = None  # type: Dict[unicode, Any]
        self.mapPredicate = None  # type: Dict[unicode, unicode]
        self.type_dsl_fields = None  # type: Set[unicode]

        self.add_context(ctx)

    def expand_url(self,
                   url,
                   base_url,
                   scoped_id=False,
                   vocab_term=False,
                   scoped_ref=None):
        # type: (unicode, unicode, bool, bool, int) -> unicode
        if url in (u"@id", u"@type"):
            return url

        if vocab_term and url in self.vocab:
            return url

        if self.vocab and u":" in url:
            prefix = url.split(u":")[0]
            if prefix in self.vocab:
                url = self.vocab[prefix] + url[len(prefix) + 1:]

        split = urlparse.urlsplit(url)

        if split.scheme or url.startswith(u"$(") or url.startswith(u"${"):
            pass
        elif scoped_id and not split.fragment:
            splitbase = urlparse.urlsplit(base_url)
            frg = u""
            if splitbase.fragment:
                frg = splitbase.fragment + u"/" + split.path
            else:
                frg = split.path
            pt = splitbase.path if splitbase.path else "/"
            url = urlparse.urlunsplit(
                (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg))
        elif scoped_ref is not None and not split.fragment:
            pass
        else:
            url = urlparse.urljoin(base_url, url)

        if vocab_term and url in self.rvocab:
            return self.rvocab[url]
        else:
            return url

    def _add_properties(self, s):  # type: (unicode) -> None
        for _, _, rng in self.graph.triples((s, RDFS.range, None)):
            literal = ((
                unicode(rng).startswith(u"http://www.w3.org/2001/XMLSchema#")
                and
                not unicode(rng) == u"http://www.w3.org/2001/XMLSchema#anyURI")
                       or unicode(rng)
                       == u"http://www.w3.org/2000/01/rdf-schema#Literal")
            if not literal:
                self.url_fields.add(unicode(s))
        self.foreign_properties.add(unicode(s))

    def add_namespaces(self, ns):  # type: (Dict[unicode, unicode]) -> None
        self.vocab.update(ns)

    def add_schemas(self, ns, base_url):
        # type: (Union[List[unicode], unicode], unicode) -> None
        for sch in aslist(ns):
            fetchurl = urlparse.urljoin(base_url, sch)
            if fetchurl not in self.cache:
                _logger.info("Getting external schema %s", fetchurl)
                content = self.fetch_text(fetchurl)
                self.cache[fetchurl] = rdflib.graph.Graph()
                for fmt in ['xml', 'turtle', 'rdfa']:
                    try:
                        self.cache[fetchurl].parse(data=content, format=fmt)
                        self.graph += self.cache[fetchurl]
                        break
                    except xml.sax.SAXParseException:  # type: ignore
                        pass
                    except TypeError:
                        pass
                    except BadSyntax:
                        pass

        for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
            self._add_properties(s)
        for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples((None, RDFS.range, None)):
            self._add_properties(s)
        for s, _, _ in self.graph.triples(
            (None, RDF.type, OWL.ObjectProperty)):
            self._add_properties(s)

        for s, _, _ in self.graph.triples((None, None, None)):
            self.idx[unicode(s)] = None

    def add_context(self, newcontext, baseuri=""):
        # type: (Loader.ContextType, unicode) -> None
        if self.vocab:
            raise validate.ValidationException(
                "Refreshing context that already has stuff in it")

        self.url_fields = set()
        self.scoped_ref_fields = {}
        self.vocab_fields = set()
        self.identifiers = set()
        self.identity_links = set()
        self.standalone = set()
        self.nolinkcheck = set()
        self.idmap = {}
        self.mapPredicate = {}
        self.vocab = {}
        self.rvocab = {}
        self.type_dsl_fields = set()

        self.ctx.update(_copy_dict_without_key(newcontext, u"@context"))

        _logger.debug("ctx is %s", self.ctx)

        for key, value in self.ctx.items():
            if value == u"@id":
                self.identifiers.add(key)
                self.identity_links.add(key)
            elif isinstance(value, dict) and value.get(u"@type") == u"@id":
                self.url_fields.add(key)
                if u"refScope" in value:
                    self.scoped_ref_fields[key] = value[u"refScope"]
                if value.get(u"identity", False):
                    self.identity_links.add(key)
            elif isinstance(value, dict) and value.get(u"@type") == u"@vocab":
                self.url_fields.add(key)
                self.vocab_fields.add(key)
                if u"refScope" in value:
                    self.scoped_ref_fields[key] = value[u"refScope"]
                if value.get(u"typeDSL"):
                    self.type_dsl_fields.add(key)
            if isinstance(value, dict) and value.get(u"noLinkCheck"):
                self.nolinkcheck.add(key)

            if isinstance(value, dict) and value.get(u"mapSubject"):
                self.idmap[key] = value[u"mapSubject"]

            if isinstance(value, dict) and value.get(u"mapPredicate"):
                self.mapPredicate[key] = value[u"mapPredicate"]

            if isinstance(value, dict) and u"@id" in value:
                self.vocab[key] = value[u"@id"]
            elif isinstance(value, basestring):
                self.vocab[key] = value

        for k, v in self.vocab.items():
            self.rvocab[self.expand_url(v, u"", scoped_id=False)] = k

        _logger.debug("identifiers is %s", self.identifiers)
        _logger.debug("identity_links is %s", self.identity_links)
        _logger.debug("url_fields is %s", self.url_fields)
        _logger.debug("vocab_fields is %s", self.vocab_fields)
        _logger.debug("vocab is %s", self.vocab)

    def resolve_ref(self, ref, base_url=None, checklinks=True):
        # type: (Union[Dict[unicode, Any], unicode], unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
        base_url = base_url or u'file://%s/' % os.path.abspath('.')

        obj = None  # type: Dict[unicode, Any]
        inc = False
        mixin = None

        # If `ref` is a dict, look for special directives.
        if isinstance(ref, dict):
            obj = ref
            if u"$import" in obj:
                if len(obj) == 1:
                    ref = obj[u"$import"]
                    obj = None
                else:
                    raise ValueError(
                        u"'$import' must be the only field in %s" % (str(obj)))
            elif u"$include" in obj:
                if len(obj) == 1:
                    ref = obj[u"$include"]
                    inc = True
                    obj = None
                else:
                    raise ValueError(
                        u"'$include' must be the only field in %s" %
                        (str(obj)))
            elif u"$mixin" in obj:
                ref = obj[u"$mixin"]
                mixin = obj
                obj = None
            else:
                ref = None
                for identifier in self.identifiers:
                    if identifier in obj:
                        ref = obj[identifier]
                        break
                if not ref:
                    raise ValueError(
                        u"Object `%s` does not have identifier field in %s" %
                        (obj, self.identifiers))

        if not isinstance(ref, (str, unicode)):
            raise ValueError(u"Must be string: `%s`" % str(ref))

        url = self.expand_url(ref, base_url, scoped_id=(obj is not None))

        # Has this reference been loaded already?
        if url in self.idx and (not mixin):
            return self.idx[url], {}

        # "$include" directive means load raw text
        if inc:
            return self.fetch_text(url), {}

        doc = None
        if obj:
            for identifier in self.identifiers:
                obj[identifier] = url
            doc_url = url
        else:
            # Load structured document
            doc_url, frg = urlparse.urldefrag(url)
            if doc_url in self.idx and (not mixin):
                # If the base document is in the index, it was already loaded,
                # so if we didn't find the reference earlier then it must not
                # exist.
                raise validate.ValidationException(
                    u"Reference `#%s` not found in file `%s`." %
                    (frg, doc_url))
            doc = self.fetch(doc_url, inject_ids=(not mixin))

        # Recursively expand urls and resolve directives
        if mixin:
            doc = copy.deepcopy(doc)
            doc.update(mixin)
            del doc["$mixin"]
            url = None
            resolved_obj, metadata = self.resolve_all(doc,
                                                      base_url,
                                                      file_base=doc_url,
                                                      checklinks=checklinks)
        else:
            resolved_obj, metadata = self.resolve_all(doc if doc else obj,
                                                      doc_url,
                                                      checklinks=checklinks)

        # Requested reference should be in the index now, otherwise it's a bad
        # reference
        if url is not None:
            if url in self.idx:
                resolved_obj = self.idx[url]
            else:
                raise RuntimeError("Reference `%s` is not in the index. "
                                   "Index contains:\n  %s" %
                                   (url, "\n  ".join(self.idx)))

        if isinstance(resolved_obj, (dict)):
            if u"$graph" in resolved_obj:
                metadata = _copy_dict_without_key(resolved_obj, u"$graph")
                return resolved_obj[u"$graph"], metadata
            else:
                return resolved_obj, metadata
        else:
            return resolved_obj, metadata

    def _resolve_idmap(self, document, loader):
        # type: (Dict[unicode, Union[Dict[unicode, Dict[unicode, unicode]], List[Dict[unicode, Any]]]], Loader) -> None
        # Convert fields with mapSubject into lists
        # use mapPredicate if the mapped value isn't a dict.
        for idmapField in loader.idmap:
            if (idmapField in document):
                idmapFieldValue = document[idmapField]
                if (isinstance(idmapFieldValue, dict)
                        and "$import" not in idmapFieldValue
                        and "$include" not in idmapFieldValue):
                    ls = []
                    for k in sorted(idmapFieldValue.keys()):
                        val = idmapFieldValue[k]
                        v = None  # type: Dict[unicode, Any]
                        if not isinstance(val, dict):
                            if idmapField in loader.mapPredicate:
                                v = {loader.mapPredicate[idmapField]: val}
                            else:
                                raise validate.ValidationException(
                                    "mapSubject '%s' value '%s' is not a dict"
                                    "and does not have a mapPredicate", k, v)
                        else:
                            v = val
                        v[loader.idmap[idmapField]] = k
                        ls.append(v)
                    document[idmapField] = ls

    typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$")

    def _type_dsl(self, t):
        # type: (Union[unicode, Dict, List]) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
        if not isinstance(t, (str, unicode)):
            return t

        m = Loader.typeDSLregex.match(t)
        if not m:
            return t
        first = m.group(1)
        second = third = None
        if m.group(2):
            second = {u"type": u"array", u"items": first}
        if m.group(3):
            third = [u"null", second or first]
        return third or second or first

    def _resolve_type_dsl(self, document, loader):
        # type: (Dict[unicode, Union[unicode, Dict[unicode, unicode], List]], Loader) -> None
        for d in loader.type_dsl_fields:
            if d in document:
                datum = document[d]
                if isinstance(datum, (str, unicode)):
                    document[d] = self._type_dsl(datum)
                elif isinstance(datum, list):
                    document[d] = [self._type_dsl(t) for t in datum]
                datum2 = document[d]
                if isinstance(datum2, list):
                    document[d] = flatten(datum2)
                    seen = []  # type: List[unicode]
                    uniq = []
                    for item in document[d]:
                        if item not in seen:
                            uniq.append(item)
                            seen.append(item)
                    document[d] = uniq

    def _resolve_identifier(self, document, loader, base_url):
        # type: (Dict[unicode, unicode], Loader, unicode) -> unicode
        # Expand identifier field (usually 'id') to resolve scope
        for identifer in loader.identifiers:
            if identifer in document:
                if isinstance(document[identifer], basestring):
                    document[identifer] = loader.expand_url(
                        document[identifer], base_url, scoped_id=True)
                    if (document[identifer] not in loader.idx or isinstance(
                            loader.idx[document[identifer]], basestring)):
                        loader.idx[document[identifer]] = document
                    base_url = document[identifer]
                else:
                    raise validate.ValidationException(
                        "identifier field '%s' must be a string" %
                        (document[identifer]))
        return base_url

    def _resolve_identity(self, document, loader, base_url):
        # type: (Dict[unicode, List[unicode]], Loader, unicode) -> None
        # Resolve scope for identity fields (fields where the value is the
        # identity of a standalone node, such as enum symbols)
        for identifer in loader.identity_links:
            if identifer in document and isinstance(document[identifer], list):
                for n, v in enumerate(document[identifer]):
                    if isinstance(document[identifer][n], basestring):
                        document[identifer][n] = loader.expand_url(
                            document[identifer][n], base_url, scoped_id=True)
                        if document[identifer][n] not in loader.idx:
                            loader.idx[document[identifer]
                                       [n]] = document[identifer][n]

    def _normalize_fields(self, document, loader):
        # type: (Dict[unicode, unicode], Loader) -> None
        # Normalize fields which are prefixed or full URIn to vocabulary terms
        for d in document:
            d2 = loader.expand_url(d, u"", scoped_id=False, vocab_term=True)
            if d != d2:
                document[d2] = document[d]
                del document[d]

    def _resolve_uris(self, document, loader, base_url):
        # type: (Dict[unicode, Union[unicode, List[unicode]]], Loader, unicode) -> None
        # Resolve remaining URLs based on document base
        for d in loader.url_fields:
            if d in document:
                datum = document[d]
                if isinstance(datum, (str, unicode)):
                    document[d] = loader.expand_url(
                        datum,
                        base_url,
                        scoped_id=False,
                        vocab_term=(d in loader.vocab_fields),
                        scoped_ref=self.scoped_ref_fields.get(d))
                elif isinstance(datum, list):
                    document[d] = [
                        loader.expand_url(
                            url,
                            base_url,
                            scoped_id=False,
                            vocab_term=(d in loader.vocab_fields),
                            scoped_ref=self.scoped_ref_fields.get(d))
                        if isinstance(url, (str, unicode)) else url
                        for url in datum
                    ]

    def resolve_all(self, document, base_url, file_base=None, checklinks=True):
        # type: (DocumentType, unicode, unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
        loader = self
        metadata = {}  # type: Dict[unicode, Any]
        if file_base is None:
            file_base = base_url

        if isinstance(document, dict):
            # Handle $import and $include
            if (u'$import' in document or u'$include' in document):
                return self.resolve_ref(document,
                                        base_url=file_base,
                                        checklinks=checklinks)
            elif u'$mixin' in document:
                return self.resolve_ref(document,
                                        base_url=base_url,
                                        checklinks=checklinks)
        elif isinstance(document, list):
            pass
        else:
            return (document, metadata)

        newctx = None  # type: Loader
        if isinstance(document, dict):
            # Handle $base, $profile, $namespaces, $schemas and $graph
            if u"$base" in document:
                base_url = document[u"$base"]

            if u"$profile" in document:
                if not newctx:
                    newctx = SubLoader(self)
                prof = self.fetch(document[u"$profile"])
                newctx.add_namespaces(document.get(u"$namespaces", {}))
                newctx.add_schemas(document.get(u"$schemas", []),
                                   document[u"$profile"])

            if u"$namespaces" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_namespaces(document[u"$namespaces"])

            if u"$schemas" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_schemas(document[u"$schemas"], file_base)

            if newctx:
                loader = newctx

            if u"$graph" in document:
                metadata = _copy_dict_without_key(document, u"$graph")
                document = document[u"$graph"]
                resolved_metadata = loader.resolve_all(metadata,
                                                       base_url,
                                                       file_base=file_base,
                                                       checklinks=False)[0]
                if isinstance(resolved_metadata, dict):
                    metadata = resolved_metadata
                else:
                    raise validate.ValidationException(
                        "Validation error, metadata must be dict: %s" %
                        (resolved_metadata))

        if isinstance(document, dict):
            self._normalize_fields(document, loader)
            self._resolve_idmap(document, loader)
            self._resolve_type_dsl(document, loader)
            base_url = self._resolve_identifier(document, loader, base_url)
            self._resolve_identity(document, loader, base_url)
            self._resolve_uris(document, loader, base_url)

            try:
                for key, val in document.items():
                    document[key], _ = loader.resolve_all(val,
                                                          base_url,
                                                          file_base=file_base,
                                                          checklinks=False)
            except validate.ValidationException as v:
                _logger.warn("loader is %s", id(loader), exc_info=True)
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in field %s:\n%s" %
                    (id(loader), file_base, key, validate.indent(str(v))))

        elif isinstance(document, list):
            i = 0
            try:
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and (u"$import" in val
                                                  or u"$mixin" in val):
                        l, _ = loader.resolve_ref(val,
                                                  base_url=file_base,
                                                  checklinks=False)
                        if isinstance(l, list):  # never true?
                            del document[i]
                            for item in aslist(l):
                                document.insert(i, item)
                                i += 1
                        else:
                            document[i] = l
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(
                            val,
                            base_url,
                            file_base=file_base,
                            checklinks=False)
                        i += 1
            except validate.ValidationException as v:
                _logger.warn("failed", exc_info=True)
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in position %i:\n%s" %
                    (id(loader), file_base, i, validate.indent(str(v))))

            for identifer in loader.identity_links:
                if identifer in metadata:
                    if isinstance(metadata[identifer], (str, unicode)):
                        metadata[identifer] = loader.expand_url(
                            metadata[identifer], base_url, scoped_id=True)
                        loader.idx[metadata[identifer]] = document

        if checklinks:
            document = self.validate_links(document, u"")

        return document, metadata

    def fetch_text(self, url):
        # type: (unicode) -> unicode
        if url in self.cache:
            return self.cache[url]

        split = urlparse.urlsplit(url)
        scheme, path = split.scheme, split.path

        if scheme in [u'http', u'https'] and self.session:
            try:
                resp = self.session.get(url)
                resp.raise_for_status()
            except Exception as e:
                raise RuntimeError(url, e)
            return resp.text
        elif scheme == 'file':
            try:
                with open(path) as fp:
                    read = fp.read()
                if hasattr(read, "decode"):
                    return read.decode("utf-8")
                else:
                    return read
            except (OSError, IOError) as e:
                raise RuntimeError('Error reading %s %s' % (url, e))
        else:
            raise ValueError('Unsupported scheme in url: %s' % url)

    def fetch(self, url, inject_ids=True):  # type: (unicode, bool) -> Any
        if url in self.idx:
            return self.idx[url]
        try:
            text = self.fetch_text(url)
            if isinstance(text, bytes):
                textIO = StringIO(text.decode('utf-8'))
            else:
                textIO = StringIO(text)
            textIO.name = url  # type: ignore
            result = yaml.load(textIO, Loader=SafeLoader)
        except yaml.parser.ParserError as e:
            raise validate.ValidationException("Syntax error %s" % (e))
        if isinstance(result, dict) and inject_ids and self.identifiers:
            for identifier in self.identifiers:
                if identifier not in result:
                    result[identifier] = url
                self.idx[self.expand_url(result[identifier], url)] = result
        else:
            self.idx[url] = result
        return result

    def check_file(self, fn):  # type: (unicode) -> bool
        if fn.startswith("file://"):
            u = urlparse.urlsplit(fn)
            return os.path.exists(u.path)
        else:
            return False

    FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode,
                                                                  Any])

    def validate_scoped(self, field, link, docid):
        # type: (unicode, unicode, unicode) -> unicode
        split = urlparse.urlsplit(docid)
        sp = split.fragment.split(u"/")
        n = self.scoped_ref_fields[field]
        while n > 0 and len(sp) > 0:
            sp.pop()
            n -= 1
        tried = []
        while True:
            sp.append(link)
            url = urlparse.urlunsplit((split.scheme, split.netloc, split.path,
                                       split.query, u"/".join(sp)))
            tried.append(url)
            if url in self.idx:
                return url
            sp.pop()
            if len(sp) == 0:
                break
            sp.pop()
        raise validate.ValidationException(
            "Field `%s` contains undefined reference to `%s`, tried %s" %
            (field, link, tried))

    def validate_link(self, field, link, docid):
        # type: (unicode, FieldType, unicode) -> FieldType
        if field in self.nolinkcheck:
            return link
        if isinstance(link, (str, unicode)):
            if field in self.vocab_fields:
                if link not in self.vocab and link not in self.idx and link not in self.rvocab:
                    if field in self.scoped_ref_fields:
                        return self.validate_scoped(field, link, docid)
                    elif not self.check_file(link):
                        raise validate.ValidationException(
                            "Field `%s` contains undefined reference to `%s`" %
                            (field, link))
            elif link not in self.idx and link not in self.rvocab:
                if field in self.scoped_ref_fields:
                    return self.validate_scoped(field, link, docid)
                elif not self.check_file(link):
                    raise validate.ValidationException(
                        "Field `%s` contains undefined reference to `%s`" %
                        (field, link))
        elif isinstance(link, list):
            errors = []
            for n, i in enumerate(link):
                try:
                    link[n] = self.validate_link(field, i, docid)
                except validate.ValidationException as v:
                    errors.append(v)
            if errors:
                raise validate.ValidationException("\n".join(
                    [str(e) for e in errors]))
        elif isinstance(link, dict):
            self.validate_links(link, docid)
        else:
            raise validate.ValidationException("Link must be a str, unicode, "
                                               "list, or a dict.")
        return link

    def getid(self, d):  # type: (Any) -> unicode
        if isinstance(d, dict):
            for i in self.identifiers:
                if i in d:
                    if isinstance(d[i], (str, unicode)):
                        return d[i]
        return None

    def validate_links(self, document, base_url):
        # type: (DocumentType, unicode) -> DocumentType
        docid = self.getid(document)
        if not docid:
            docid = base_url

        errors = []
        iterator = None  # type: Any
        if isinstance(document, list):
            iterator = enumerate(document)
        elif isinstance(document, dict):
            try:
                for d in self.url_fields:
                    if d in document and d not in self.identity_links:
                        document[d] = self.validate_link(d, document[d], docid)
            except validate.ValidationException as v:
                errors.append(v)
            if hasattr(document, "iteritems"):
                iterator = document.iteritems()
            else:
                iterator = document.items()
        else:
            return document

        for key, val in iterator:
            try:
                document[key] = self.validate_links(val, docid)
            except validate.ValidationException as v:
                if key not in self.nolinkcheck:
                    docid2 = self.getid(val)
                    if docid2:
                        errors.append(
                            validate.ValidationException(
                                "While checking object `%s`\n%s" %
                                (docid2, validate.indent(str(v)))))
                    else:
                        if isinstance(key, basestring):
                            errors.append(
                                validate.ValidationException(
                                    "While checking field `%s`\n%s" %
                                    (key, validate.indent(str(v)))))
                        else:
                            errors.append(
                                validate.ValidationException(
                                    "While checking position %s\n%s" %
                                    (key, validate.indent(str(v)))))

        if errors:
            if len(errors) > 1:
                raise validate.ValidationException("\n".join(
                    [str(e) for e in errors]))
            else:
                raise errors[0]
        return document
コード例 #12
0
def use_wrapper():
    print('Using helper')
    sess = CacheControl(Session())
    return sess
コード例 #13
0
class Loader(object):

    ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
    DocumentType = TypeVar('DocumentType', List, Dict[unicode, Any])

    def __init__(self, ctx, schemagraph=None, foreign_properties=None,
                 idx=None, cache=None, session=None):
        # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
        normalize = lambda url: urlparse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: Loader.ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        self.session = None  # type: requests.sessions.Session
        if session is not None:
            self.session = session
        else:
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))

        self.url_fields = None  # type: Set[unicode]
        self.scoped_ref_fields = None  # type: Dict[unicode, int]
        self.vocab_fields = None  # type: Set[unicode]
        self.identifiers = None  # type: Set[unicode]
        self.identity_links = None  # type: Set[unicode]
        self.standalone = None  # type: Set[unicode]
        self.nolinkcheck = None  # type: Set[unicode]
        self.vocab = {}  # type: Dict[unicode, unicode]
        self.rvocab = {}  # type: Dict[unicode, unicode]
        self.idmap = None  # type: Dict[unicode, Any]
        self.mapPredicate = None  # type: Dict[unicode, unicode]
        self.type_dsl_fields = None  # type: Set[unicode]

        self.add_context(ctx)

    def expand_url(self, url, base_url, scoped_id=False, vocab_term=False, scoped_ref=None):
        # type: (unicode, unicode, bool, bool, int) -> unicode
        if url in (u"@id", u"@type"):
            return url

        if vocab_term and url in self.vocab:
            return url

        if self.vocab and u":" in url:
            prefix = url.split(u":")[0]
            if prefix in self.vocab:
                url = self.vocab[prefix] + url[len(prefix) + 1:]

        split = urlparse.urlsplit(url)

        if split.scheme or url.startswith(u"$(") or url.startswith(u"${"):
            pass
        elif scoped_id and not split.fragment:
            splitbase = urlparse.urlsplit(base_url)
            frg = u""
            if splitbase.fragment:
                frg = splitbase.fragment + u"/" + split.path
            else:
                frg = split.path
            pt = splitbase.path if splitbase.path else "/"
            url = urlparse.urlunsplit(
                (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg))
        elif scoped_ref is not None and not split.fragment:
            pass
        else:
            url = urlparse.urljoin(base_url, url)

        if vocab_term and url in self.rvocab:
            return self.rvocab[url]
        else:
            return url

    def _add_properties(self, s):  # type: (unicode) -> None
        for _, _, rng in self.graph.triples((s, RDFS.range, None)):
            literal = ((unicode(rng).startswith(
                u"http://www.w3.org/2001/XMLSchema#") and
                not unicode(rng) == u"http://www.w3.org/2001/XMLSchema#anyURI")
                or unicode(rng) ==
                u"http://www.w3.org/2000/01/rdf-schema#Literal")
            if not literal:
                self.url_fields.add(unicode(s))
        self.foreign_properties.add(unicode(s))

    def add_namespaces(self, ns):  # type: (Dict[unicode, unicode]) -> None
        self.vocab.update(ns)

    def add_schemas(self, ns, base_url):
        # type: (Union[List[unicode], unicode], unicode) -> None
        for sch in aslist(ns):
            fetchurl = urlparse.urljoin(base_url, sch)
            if fetchurl not in self.cache:
                _logger.info("Getting external schema %s", fetchurl)
                content = self.fetch_text(fetchurl)
                self.cache[fetchurl] = rdflib.graph.Graph()
                for fmt in ['xml', 'turtle', 'rdfa']:
                    try:
                        self.cache[fetchurl].parse(data=content, format=fmt)
                        self.graph += self.cache[fetchurl]
                        break
                    except xml.sax.SAXParseException:  # type: ignore
                        pass
                    except TypeError:
                        pass
                    except BadSyntax:
                        pass

        for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
            self._add_properties(s)
        for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples((None, RDFS.range, None)):
            self._add_properties(s)
        for s, _, _ in self.graph.triples((None, RDF.type, OWL.ObjectProperty)):
            self._add_properties(s)

        for s, _, _ in self.graph.triples((None, None, None)):
            self.idx[unicode(s)] = None

    def add_context(self, newcontext, baseuri=""):
        # type: (Loader.ContextType, unicode) -> None
        if self.vocab:
            raise validate.ValidationException(
                "Refreshing context that already has stuff in it")

        self.url_fields = set()
        self.scoped_ref_fields = {}
        self.vocab_fields = set()
        self.identifiers = set()
        self.identity_links = set()
        self.standalone = set()
        self.nolinkcheck = set()
        self.idmap = {}
        self.mapPredicate = {}
        self.vocab = {}
        self.rvocab = {}
        self.type_dsl_fields = set()

        self.ctx.update(_copy_dict_without_key(newcontext, u"@context"))

        _logger.debug("ctx is %s", self.ctx)

        for key, value in self.ctx.items():
            if value == u"@id":
                self.identifiers.add(key)
                self.identity_links.add(key)
            elif isinstance(value, dict) and value.get(u"@type") == u"@id":
                self.url_fields.add(key)
                if u"refScope" in value:
                    self.scoped_ref_fields[key] = value[u"refScope"]
                if value.get(u"identity", False):
                    self.identity_links.add(key)
            elif isinstance(value, dict) and value.get(u"@type") == u"@vocab":
                self.url_fields.add(key)
                self.vocab_fields.add(key)
                if u"refScope" in value:
                    self.scoped_ref_fields[key] = value[u"refScope"]
                if value.get(u"typeDSL"):
                    self.type_dsl_fields.add(key)
            if isinstance(value, dict) and value.get(u"noLinkCheck"):
                self.nolinkcheck.add(key)

            if isinstance(value, dict) and value.get(u"mapSubject"):
                self.idmap[key] = value[u"mapSubject"]

            if isinstance(value, dict) and value.get(u"mapPredicate"):
                self.mapPredicate[key] = value[u"mapPredicate"]

            if isinstance(value, dict) and u"@id" in value:
                self.vocab[key] = value[u"@id"]
            elif isinstance(value, basestring):
                self.vocab[key] = value

        for k, v in self.vocab.items():
            self.rvocab[self.expand_url(v, u"", scoped_id=False)] = k

        _logger.debug("identifiers is %s", self.identifiers)
        _logger.debug("identity_links is %s", self.identity_links)
        _logger.debug("url_fields is %s", self.url_fields)
        _logger.debug("vocab_fields is %s", self.vocab_fields)
        _logger.debug("vocab is %s", self.vocab)

    def resolve_ref(self, ref, base_url=None, checklinks=True):
        # type: (Union[Dict[unicode, Any], unicode], unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
        base_url = base_url or u'file://%s/' % os.path.abspath('.')

        obj = None  # type: Dict[unicode, Any]
        inc = False
        mixin = None

        # If `ref` is a dict, look for special directives.
        if isinstance(ref, dict):
            obj = ref
            if u"$import" in obj:
                if len(obj) == 1:
                    ref = obj[u"$import"]
                    obj = None
                else:
                    raise ValueError(
                        u"'$import' must be the only field in %s" % (str(obj)))
            elif u"$include" in obj:
                if len(obj) == 1:
                    ref = obj[u"$include"]
                    inc = True
                    obj = None
                else:
                    raise ValueError(
                        u"'$include' must be the only field in %s" % (str(obj)))
            elif u"$mixin" in obj:
                ref = obj[u"$mixin"]
                mixin = obj
                obj = None
            else:
                ref = None
                for identifier in self.identifiers:
                    if identifier in obj:
                        ref = obj[identifier]
                        break
                if not ref:
                    raise ValueError(
                        u"Object `%s` does not have identifier field in %s" % (obj, self.identifiers))

        if not isinstance(ref, (str, unicode)):
            raise ValueError(u"Must be string: `%s`" % str(ref))

        url = self.expand_url(ref, base_url, scoped_id=(obj is not None))

        # Has this reference been loaded already?
        if url in self.idx and (not mixin):
            return self.idx[url], {}

        # "$include" directive means load raw text
        if inc:
            return self.fetch_text(url), {}

        doc = None
        if obj:
            for identifier in self.identifiers:
                obj[identifier] = url
            doc_url = url
        else:
            # Load structured document
            doc_url, frg = urlparse.urldefrag(url)
            if doc_url in self.idx and (not mixin):
                # If the base document is in the index, it was already loaded,
                # so if we didn't find the reference earlier then it must not
                # exist.
                raise validate.ValidationException(
                    u"Reference `#%s` not found in file `%s`." % (frg, doc_url))
            doc = self.fetch(doc_url, inject_ids=(not mixin))

        # Recursively expand urls and resolve directives
        if mixin:
            doc = copy.deepcopy(doc)
            doc.update(mixin)
            del doc["$mixin"]
            url = None
            resolved_obj, metadata = self.resolve_all(
                doc, base_url, file_base=doc_url, checklinks=checklinks)
        else:
            resolved_obj, metadata = self.resolve_all(
                doc if doc else obj, doc_url, checklinks=checklinks)

        # Requested reference should be in the index now, otherwise it's a bad
        # reference
        if url is not None:
            if url in self.idx:
                resolved_obj = self.idx[url]
            else:
                raise RuntimeError("Reference `%s` is not in the index. "
                    "Index contains:\n  %s" % (url, "\n  ".join(self.idx)))

        if isinstance(resolved_obj, (dict)):
            if u"$graph" in resolved_obj:
                metadata = _copy_dict_without_key(resolved_obj, u"$graph")
                return resolved_obj[u"$graph"], metadata
            else:
                return resolved_obj, metadata
        else:
            return resolved_obj, metadata


    def _resolve_idmap(self, document, loader):
        # type: (Dict[unicode, Union[Dict[unicode, Dict[unicode, unicode]], List[Dict[unicode, Any]]]], Loader) -> None
        # Convert fields with mapSubject into lists
        # use mapPredicate if the mapped value isn't a dict.
        for idmapField in loader.idmap:
            if (idmapField in document):
                idmapFieldValue = document[idmapField]
                if (isinstance(idmapFieldValue, dict)
                        and "$import" not in idmapFieldValue
                        and "$include" not in idmapFieldValue):
                    ls = []
                    for k in sorted(idmapFieldValue.keys()):
                        val = idmapFieldValue[k]
                        v = None  # type: Dict[unicode, Any]
                        if not isinstance(val, dict):
                            if idmapField in loader.mapPredicate:
                                v = {loader.mapPredicate[idmapField]: val}
                            else:
                                raise validate.ValidationException(
                                    "mapSubject '%s' value '%s' is not a dict"
                                    "and does not have a mapPredicate", k, v)
                        else:
                            v = val
                        v[loader.idmap[idmapField]] = k
                        ls.append(v)
                    document[idmapField] = ls

    typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$")

    def _type_dsl(self, t):
        # type: (Union[unicode, Dict, List]) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
        if not isinstance(t, (str, unicode)):
            return t

        m = Loader.typeDSLregex.match(t)
        if not m:
            return t
        first = m.group(1)
        second = third = None
        if m.group(2):
            second = {u"type": u"array",
                 u"items": first}
        if m.group(3):
            third = [u"null", second or first]
        return third or second or first

    def _resolve_type_dsl(self, document, loader):
        # type: (Dict[unicode, Union[unicode, Dict[unicode, unicode], List]], Loader) -> None
        for d in loader.type_dsl_fields:
            if d in document:
                datum = document[d]
                if isinstance(datum, (str, unicode)):
                    document[d] = self._type_dsl(datum)
                elif isinstance(datum, list):
                    document[d] = [self._type_dsl(t) for t in datum]
                datum2 = document[d]
                if isinstance(datum2, list):
                    document[d] = flatten(datum2)
                    seen = []  # type: List[unicode]
                    uniq = []
                    for item in document[d]:
                        if item not in seen:
                            uniq.append(item)
                            seen.append(item)
                    document[d] = uniq

    def _resolve_identifier(self, document, loader, base_url):
        # type: (Dict[unicode, unicode], Loader, unicode) -> unicode
        # Expand identifier field (usually 'id') to resolve scope
        for identifer in loader.identifiers:
            if identifer in document:
                if isinstance(document[identifer], basestring):
                    document[identifer] = loader.expand_url(
                        document[identifer], base_url, scoped_id=True)
                    if (document[identifer] not in loader.idx
                            or isinstance(
                                loader.idx[document[identifer]], basestring)):
                        loader.idx[document[identifer]] = document
                    base_url = document[identifer]
                else:
                    raise validate.ValidationException(
                        "identifier field '%s' must be a string"
                        % (document[identifer]))
        return base_url

    def _resolve_identity(self, document, loader, base_url):
        # type: (Dict[unicode, List[unicode]], Loader, unicode) -> None
        # Resolve scope for identity fields (fields where the value is the
        # identity of a standalone node, such as enum symbols)
        for identifer in loader.identity_links:
            if identifer in document and isinstance(document[identifer], list):
                for n, v in enumerate(document[identifer]):
                    if isinstance(document[identifer][n], basestring):
                        document[identifer][n] = loader.expand_url(
                            document[identifer][n], base_url, scoped_id=True)
                        if document[identifer][n] not in loader.idx:
                            loader.idx[document[identifer][
                                n]] = document[identifer][n]

    def _normalize_fields(self, document, loader):
        # type: (Dict[unicode, unicode], Loader) -> None
        # Normalize fields which are prefixed or full URIn to vocabulary terms
        for d in document:
            d2 = loader.expand_url(d, u"", scoped_id=False, vocab_term=True)
            if d != d2:
                document[d2] = document[d]
                del document[d]

    def _resolve_uris(self, document, loader, base_url):
        # type: (Dict[unicode, Union[unicode, List[unicode]]], Loader, unicode) -> None
        # Resolve remaining URLs based on document base
        for d in loader.url_fields:
            if d in document:
                datum = document[d]
                if isinstance(datum, (str, unicode)):
                    document[d] = loader.expand_url(
                        datum, base_url, scoped_id=False,
                        vocab_term=(d in loader.vocab_fields),
                        scoped_ref=self.scoped_ref_fields.get(d))
                elif isinstance(datum, list):
                    document[d] = [
                        loader.expand_url(
                            url, base_url, scoped_id=False,
                            vocab_term=(d in loader.vocab_fields),
                            scoped_ref=self.scoped_ref_fields.get(d))
                        if isinstance(url, (str, unicode))
                        else url for url in datum]


    def resolve_all(self, document, base_url, file_base=None, checklinks=True):
        # type: (DocumentType, unicode, unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
        loader = self
        metadata = {}  # type: Dict[unicode, Any]
        if file_base is None:
            file_base = base_url

        if isinstance(document, dict):
            # Handle $import and $include
            if (u'$import' in document or u'$include' in document):
                return self.resolve_ref(document, base_url=file_base, checklinks=checklinks)
            elif u'$mixin' in document:
                return self.resolve_ref(document, base_url=base_url, checklinks=checklinks)
        elif isinstance(document, list):
            pass
        else:
            return (document, metadata)

        newctx = None  # type: Loader
        if isinstance(document, dict):
            # Handle $base, $profile, $namespaces, $schemas and $graph
            if u"$base" in document:
                base_url = document[u"$base"]

            if u"$profile" in document:
                if not newctx:
                    newctx = SubLoader(self)
                prof = self.fetch(document[u"$profile"])
                newctx.add_namespaces(document.get(u"$namespaces", {}))
                newctx.add_schemas(document.get(
                    u"$schemas", []), document[u"$profile"])

            if u"$namespaces" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_namespaces(document[u"$namespaces"])

            if u"$schemas" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_schemas(document[u"$schemas"], file_base)

            if newctx:
                loader = newctx

            if u"$graph" in document:
                metadata = _copy_dict_without_key(document, u"$graph")
                document = document[u"$graph"]
                resolved_metadata = loader.resolve_all(metadata, base_url,
                        file_base=file_base, checklinks=False)[0]
                if isinstance(resolved_metadata, dict):
                    metadata = resolved_metadata
                else:
                    raise validate.ValidationException(
                        "Validation error, metadata must be dict: %s"
                        % (resolved_metadata))

        if isinstance(document, dict):
            self._normalize_fields(document, loader)
            self._resolve_idmap(document, loader)
            self._resolve_type_dsl(document, loader)
            base_url = self._resolve_identifier(document, loader, base_url)
            self._resolve_identity(document, loader, base_url)
            self._resolve_uris(document, loader, base_url)

            try:
                for key, val in document.items():
                    document[key], _ = loader.resolve_all(
                        val, base_url, file_base=file_base, checklinks=False)
            except validate.ValidationException as v:
                _logger.warn("loader is %s", id(loader), exc_info=True)
                raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
                    id(loader), file_base, key, validate.indent(str(v))))

        elif isinstance(document, list):
            i = 0
            try:
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and (u"$import" in val or u"$mixin" in val):
                        l, _ = loader.resolve_ref(val, base_url=file_base, checklinks=False)
                        if isinstance(l, list):  # never true?
                            del document[i]
                            for item in aslist(l):
                                document.insert(i, item)
                                i += 1
                        else:
                            document[i] = l
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(
                            val, base_url, file_base=file_base, checklinks=False)
                        i += 1
            except validate.ValidationException as v:
                _logger.warn("failed", exc_info=True)
                raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
                    id(loader), file_base, i, validate.indent(str(v))))

            for identifer in loader.identity_links:
                if identifer in metadata:
                    if isinstance(metadata[identifer], (str, unicode)):
                        metadata[identifer] = loader.expand_url(
                            metadata[identifer], base_url, scoped_id=True)
                        loader.idx[metadata[identifer]] = document

        if checklinks:
            document = self.validate_links(document, u"")

        return document, metadata

    def fetch_text(self, url):
        # type: (unicode) -> unicode
        if url in self.cache:
            return self.cache[url]

        split = urlparse.urlsplit(url)
        scheme, path = split.scheme, split.path

        if scheme in [u'http', u'https'] and self.session:
            try:
                resp = self.session.get(url)
                resp.raise_for_status()
            except Exception as e:
                raise RuntimeError(url, e)
            return resp.text
        elif scheme == 'file':
            try:
                with open(path) as fp:
                    read = fp.read()
                if hasattr(read, "decode"):
                    return read.decode("utf-8")
                else:
                    return read
            except (OSError, IOError) as e:
                raise RuntimeError('Error reading %s %s' % (url, e))
        else:
            raise ValueError('Unsupported scheme in url: %s' % url)

    def fetch(self, url, inject_ids=True):  # type: (unicode, bool) -> Any
        if url in self.idx:
            return self.idx[url]
        try:
            text = self.fetch_text(url)
            if isinstance(text, bytes):
                textIO = StringIO(text.decode('utf-8'))
            else:
                textIO = StringIO(text)
            textIO.name = url  # type: ignore
            result = yaml.load(textIO, Loader=SafeLoader)
        except yaml.parser.ParserError as e:
            raise validate.ValidationException("Syntax error %s" % (e))
        if isinstance(result, dict) and inject_ids and self.identifiers:
            for identifier in self.identifiers:
                if identifier not in result:
                    result[identifier] = url
                self.idx[self.expand_url(result[identifier], url)] = result
        else:
            self.idx[url] = result
        return result

    def check_file(self, fn):  # type: (unicode) -> bool
        if fn.startswith("file://"):
            u = urlparse.urlsplit(fn)
            return os.path.exists(u.path)
        else:
            return False

    FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode, Any])

    def validate_scoped(self, field, link, docid):
        # type: (unicode, unicode, unicode) -> unicode
        split = urlparse.urlsplit(docid)
        sp = split.fragment.split(u"/")
        n = self.scoped_ref_fields[field]
        while n > 0 and len(sp) > 0:
            sp.pop()
            n -= 1
        tried = []
        while True:
            sp.append(link)
            url = urlparse.urlunsplit((
                split.scheme, split.netloc, split.path, split.query,
                u"/".join(sp)))
            tried.append(url)
            if url in self.idx:
                return url
            sp.pop()
            if len(sp) == 0:
                break
            sp.pop()
        raise validate.ValidationException(
            "Field `%s` contains undefined reference to `%s`, tried %s" % (field, link, tried))

    def validate_link(self, field, link, docid):
        # type: (unicode, FieldType, unicode) -> FieldType
        if field in self.nolinkcheck:
            return link
        if isinstance(link, (str, unicode)):
            if field in self.vocab_fields:
                if link not in self.vocab and link not in self.idx and link not in self.rvocab:
                    if field in self.scoped_ref_fields:
                        return self.validate_scoped(field, link, docid)
                    elif not self.check_file(link):
                        raise validate.ValidationException(
                            "Field `%s` contains undefined reference to `%s`" % (field, link))
            elif link not in self.idx and link not in self.rvocab:
                if field in self.scoped_ref_fields:
                    return self.validate_scoped(field, link, docid)
                elif not self.check_file(link):
                    raise validate.ValidationException(
                        "Field `%s` contains undefined reference to `%s`" % (field, link))
        elif isinstance(link, list):
            errors = []
            for n, i in enumerate(link):
                try:
                    link[n] = self.validate_link(field, i, docid)
                except validate.ValidationException as v:
                    errors.append(v)
            if errors:
                raise validate.ValidationException(
                    "\n".join([str(e) for e in errors]))
        elif isinstance(link, dict):
            self.validate_links(link, docid)
        else:
            raise validate.ValidationException("Link must be a str, unicode, "
                                               "list, or a dict.")
        return link

    def getid(self, d):  # type: (Any) -> unicode
        if isinstance(d, dict):
            for i in self.identifiers:
                if i in d:
                    if isinstance(d[i], (str, unicode)):
                        return d[i]
        return None

    def validate_links(self, document, base_url):
        # type: (DocumentType, unicode) -> DocumentType
        docid = self.getid(document)
        if not docid:
            docid = base_url

        errors = []
        iterator = None  # type: Any
        if isinstance(document, list):
            iterator = enumerate(document)
        elif isinstance(document, dict):
            try:
                for d in self.url_fields:
                    if d in document and d not in self.identity_links:
                        document[d] = self.validate_link(d, document[d], docid)
            except validate.ValidationException as v:
                errors.append(v)
            if hasattr(document, "iteritems"):
                iterator = document.iteritems()
            else:
                iterator = document.items()
        else:
            return document

        for key, val in iterator:
            try:
                document[key] = self.validate_links(val, docid)
            except validate.ValidationException as v:
                if key not in self.nolinkcheck:
                    docid2 = self.getid(val)
                    if docid2:
                        errors.append(validate.ValidationException(
                            "While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
                    else:
                        if isinstance(key, basestring):
                            errors.append(validate.ValidationException(
                                "While checking field `%s`\n%s" % (key, validate.indent(str(v)))))
                        else:
                            errors.append(validate.ValidationException(
                                "While checking position %s\n%s" % (key, validate.indent(str(v)))))

        if errors:
            if len(errors) > 1:
                raise validate.ValidationException(
                    "\n".join([str(e) for e in errors]))
            else:
                raise errors[0]
        return document