Ejemplo n.º 1
0
def _populate_uri_to_validator(
    validator: AValidator,
    root_base_uri,
    uri_to_validator: Dict,
):
    """
    This needs to be run after _set_to_canonical_uri because _set_to_canonical_uri propagates
    the base_URIs through the subschemas.
    returns a dictionary of the uri to validator mapping
    the uris are of three types.
    - canonical id
    - canonical id + location
    - canonical id + anchor
    """
    if (isinstance(validator, (Keyword, AValidator))
            and not isinstance(validator, KeywordGroup)
            and validator.location):
        uri_to_validator[urijoin(root_base_uri,
                                 "#" + validator.location)] = validator

    if validator.id is not None and isinstance(validator, Validator):
        validator_id = validator.id.rstrip("/")
        uri_to_validator[validator_id] = validator

    if validator.anchor:
        uri_to_validator[(urijoin(validator.base_uri or "",
                                  validator.anchor))] = validator

    for sub_validator in validator.sub_validators():
        _populate_uri_to_validator(
            validator=sub_validator,
            root_base_uri=root_base_uri,
            uri_to_validator=uri_to_validator,
        )
Ejemplo n.º 2
0
 def _make_podcast_uri(self, xml, ep_guid=None):
     if not xml:
         return None
     # Feeds use main URL, not localized
     baseurl = ivooxapi.get_baseurl()
     program = uritools.urijoin(baseurl, xml)
     episode = '#' + uritools.urijoin(baseurl, ep_guid) if ep_guid else ''
     return 'podcast+{}{}'.format(program, episode)
Ejemplo n.º 3
0
 def check(self, base, ref, expected, strict=False):
     self.assertEqual(expected, urijoin(base, ref, strict))
     # base as bytes, ref as str
     self.assertEqual(expected, urijoin(base.encode(), ref, strict))
     # base as str, ref as bytes
     self.assertEqual(expected, urijoin(base, ref.encode(), strict))
     # both base and ref as bytes
     self.assertEqual(expected.encode(),
                      urijoin(base.encode(), ref.encode(), strict))
Ejemplo n.º 4
0
def wrap_response(url, data, method):

    title = ("title" in data and data["title"]) or ""
    image = ("image" in data and data["image"]) or ""
    favicon = ("favicon" in data and data["favicon"]) or ""
    description = ("description" in data and data["description"]) or ""
    url = ("url" in data and data["url"]) or url
    css = ("css" in data and data["css"]) or ""

    if image:
        image = urijoin(url, image)

    if favicon:
        favicon = urijoin(url, favicon)

    domain = urisplit(url).authority
    if "domain" in data and data["domain"]:
        domain = data["domain"]
    site = data["title"]
    if "site_name" in data and data["site_name"]:
        site = data["site_name"]
    if "provider_name" in data and data["provider_name"]:
        site = data["provider_name"]

    if method == "oembed" or (method == "custom" and "html" in data and data["html"]):
        html = data["html"]
    else:
        if image:
            html = template("unfurl-image", url, image, title, description, domain)
        elif favicon:
            html = template(
                "unfurl-image unfurl-favicon", url, favicon, title, description, domain
            )
        else:
            html = template(
                "unfurl-image unfurl-default",
                url,
                "https://i.imgur.com/wQ37ilJ.png",
                title,
                description,
                domain,
            )

    return {
        "method": method,
        "site": site,
        "domain": domain,
        "title": title,
        "description": description,
        "image": image,
        "favicon": favicon,
        "url": url,
        "type": "rich",
        "html": html,
        "css": css,
    }
Ejemplo n.º 5
0
 def check(self, base, ref, expected, strict=False):
     self.assertEqual(expected, urijoin(base, ref, strict))
     # base as bytes, ref as str
     self.assertEqual(expected, urijoin(base.encode(), ref, strict))
     # base as str, ref as bytes
     self.assertEqual(expected, urijoin(base, ref.encode(), strict))
     # both base and ref as bytes
     self.assertEqual(
         expected.encode(),
         urijoin(base.encode(), ref.encode(), strict)
     )
Ejemplo n.º 6
0
 def url_fix(self, url):
     if url == "": # Null case
         rv = url
     elif url.startswith('data:'): # data uri, not actually a link, leave it alone
         # e.g. data:text/html,<script>alert('hi');</script>
         rv = url
     elif urisplit(url)[0]: # absolute / has scheme
         # e.g http://example.com/path
         rv = self.proxy +'/' + url
     elif urisplit(url)[1]: # protocol relatives prefixed with '//' / no scheme but has authority
         # e.g. //example.com/path
         # PRs should be able to be either http or https. In practice some sites are turning off
         # support for http, so just force everything through https
         rv = self.proxy + '/https://' + url.lstrip('/')
     elif not urisplit(url)[1] and urisplit(url)[2]: # relative / no authority but path
         # e.g. ../path
         rv = self.proxy + '/' + urijoin(self.host, url)
     elif url == '#' or (not urisplit(url)[2] and not urisplit(url)[3] and urisplit(url)[4]): # fragment
         # e.g. #id-12345
         # fragments are left alone
         rv = url
     else:
         click.echo('\n\nUnknown url protocol with url: %s' % url)
         rv = url
     return rv
Ejemplo n.º 7
0
def get_redirect(url):
    # simple case: it's already in the dict
    url = absolute_from_relative_url(url)
    if url in redirects:
        return redirects[url]

    # Try looking it up without the fragment
    defrag_url = uritools.uridefrag(url).uri
    fragment = uritools.uridefrag(url).uri
    if fragment:
        if defrag_url in redirects:
            return uritools.urijoin(redirects[defrag_url], '#'+fragment)

    # Try fixing http/https to match the TOC
    url_parts = uritools.urisplit(url)
    base_url_parts = uritools.urisplit(redirects[base_url])
    fixed_scheme_url = uritools.uriunsplit(
        list(base_url_parts)[:1] + list(url_parts)[1:])
    if fixed_scheme_url in redirects:
        return redirects[fixed_scheme_url]

    # if same domain, try scraping it
    if url_parts.host == base_url_parts.host:
        try:
            print(f"Scraping url for get_redirect: {url}")
            scraper_result = scraper.scrape(
                url, wait_for_selector=config['post_body_selector'])
            redirects[url] = scraper_result['final_url']
            # TODO: Make store this scraped result in the book as well?
            return redirects[url]
        except (urllib.error.URLError, ssl.SSLError):
            return url  # TODO: Could return '' or something but for now leaving it seems fine
    # else, couldn't find it, so leave it alone.

    return url
Ejemplo n.º 8
0
 def dereference(self, uri: str) -> dict:
     """Return JSON object corresponding to resolved URI reference
     
     :param uri: URI string
     """
     reference_path = urijoin(self.scope_uri, uri)
     return self.registry.load_uri(reference_path)
Ejemplo n.º 9
0
 def follow_uri(self, uri: str) -> 'Context':
     """Return new Context corresponding to scope after following uri
     
     :param uri: URI string
     """
     new_uri = urijoin(self.scope_uri, uri)
     return self.__class__(new_uri, self.registry)
Ejemplo n.º 10
0
 def _get_or_create_image_file(self, path, data=None):
     what = imghdr.what(path, data)
     if not what:
         raise ValueError("Unknown image type")
     if not data:
         with open(path, "rb") as f:
             data = f.read()
     digest, width, height = hashlib.md5(data).hexdigest(), None, None
     try:
         if what == "png":
             width, height = get_image_size_png(data)
         elif what == "gif":
             width, height = get_image_size_gif(data)
         elif what == "jpeg":
             width, height = get_image_size_jpeg(data)
     except Exception as e:
         logger.error("Error getting image size for %r: %r", path, e)
     if width and height:
         name = "%s-%dx%d.%s" % (digest, width, height, what)
     else:
         name = f"{digest}.{what}"
     image_path = self._image_dir / name
     if not image_path.is_file():
         logger.info(f"Creating file {image_path.as_uri()}")
         image_path.write_bytes(data)
     return uritools.urijoin(self._base_uri, name)
Ejemplo n.º 11
0
 def _get_or_create_image_file(self, path, data=None):
     what = imghdr.what(path, data)
     if not what:
         raise ValueError('Unknown image type')
     if not data:
         data = open(path).read()
     digest, width, height = hashlib.md5(data).hexdigest(), None, None
     try:
         if what == 'png':
             width, height = get_image_size_png(data)
         elif what == 'gif':
             width, height = get_image_size_gif(data)
         elif what == 'jpeg':
             width, height = get_image_size_jpeg(data)
     except Exception as e:
         logger.error('Error getting image size for %r: %r', path, e)
     if width and height:
         name = '%s-%dx%d.%s' % (digest, width, height, what)
     else:
         name = '%s.%s' % (digest, what)
     dest = os.path.join(self.image_dir, name)
     if not os.path.isfile(dest):
         logger.info('Creating file %s', dest)
         with open(dest, 'wb') as fh:
             fh.write(data)
     return uritools.urijoin(self.base_uri, name)
Ejemplo n.º 12
0
	def bootstrap(self, refSchemaTuple = tuple()):
		(id2ElemId , keyRefs , refSchemaCache) = refSchemaTuple
		
		keyList = keyRefs[self.triggerAttribute]
		errors = []
		# Saving the unique locations
		# based on information from FeatureLoc elems
		for loc in keyList:
			fk_defs = loc.context[self.triggerAttribute]
			fk_defs_gid = str(id(loc.context))
			
			#fk_defs_gid = loc.path
			for fk_loc_i, p_FK_decl in enumerate(fk_defs):
				fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i)
				ref_schema_id = p_FK_decl['schema_id']
				if uritools.isabsuri(self.schemaURI):
					abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id)
				else:
					abs_ref_schema_id = ref_schema_id
				
				if abs_ref_schema_id not in refSchemaCache:
					errors.append({
						'reason': 'fk_no_schema',
						'description': "No schema with {0} id, required by {1} ({2})".format(abs_ref_schema_id,self.jsonSchemaSource,self.schemaURI)
					})
				
				fk_members = p_FK_decl.get('members',[])
				fkLoc = FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path=loc.path+'/'+str(fk_loc_i),values=list())
				fk_id = abs_ref_schema_id
				fkDefH = self.FKWorld.setdefault(fk_id,{})
				
				# This control is here for same primary key referenced from multiple cases
				fkDefH[fk_loc_id] = FKDef(fkLoc=fkLoc,members=fk_members)
		
		return errors
Ejemplo n.º 13
0
        def recursive_traversal(
                schema_object: Union[Dict[str, Any],
                                     List[Any]]) -> Dict[str, Any]:
            retrieved_object = None

            if isinstance(schema_object, dict):
                for k, v in schema_object.items():
                    if isinstance(v, dict) or isinstance(v, list):
                        recursive_traversal(v)
                    elif k == "$ref":
                        # Processes case
                        if v.startswith("processes/"):
                            pass
                        else:
                            if not uritools.urisplit(v).gethost():
                                v = uritools.urijoin(url, v)
                            retrieved_object = self._resolve_schema(v)

                if retrieved_object is not None:
                    schema_object.update(retrieved_object)
                    del schema_object["$ref"]
            if isinstance(schema_object, list):
                for i in schema_object:
                    if isinstance(i, dict) or isinstance(i, list):
                        recursive_traversal(i)
Ejemplo n.º 14
0
 def _cleanup_images(self):
     logger.info("Cleaning up image directory")
     with self._connect() as c:
         uris = set(schema.get_image_uris(c))
     for image_path in self._image_dir.glob("**/*"):
         if uritools.urijoin(self._base_uri, image_path.name) not in uris:
             logger.info(f"Deleting file {image_path.as_uri()}")
             image_path.unlink()
Ejemplo n.º 15
0
 def get_or_create_image_file(self, path, data=None):
     what = imghdr.what(path, data)
     if not what:
         raise ValueError('Unknown image type')
     if not data:
         data = open(path).read()
     name = hashlib.md5(data).hexdigest() + '.' + what
     path = os.path.join(self.image_dir, name)
     get_or_create_file(str(path), True, data)
     return uritools.urijoin(self.base_uri, name)
Ejemplo n.º 16
0
    def _get(self, path, params=None):
        url = uritools.urijoin(self.base_url, path)
        retries = self.retries
        timeout = self.timeout

        while True:
            try:
                return self.session.get(url, params=params, timeout=timeout)
            except requests.exceptions.ConnectionError as e:
                if not retries:
                    raise e
                logger.warn('Error connecting to the Internet Archive: %s', e)
                retries -= 1
Ejemplo n.º 17
0
    def cleanup(self):
        logger.info('Cleaning up image directory')
        uris = set()
        for track in self.library.begin():
            if track.album and track.album.images:
                uris.update(track.album.images)
        self.library.close()

        for root, _, files in os.walk(self.image_dir):
            for name in files:
                if uritools.urijoin(self.base_uri, name) not in uris:
                    path = os.path.join(root, name)
                    logger.info('Deleting file %s', path)
                    os.remove(path)
Ejemplo n.º 18
0
    def _cleanup(self):
        logger.info('Cleaning up image directory')
        uris = set()
        for track in self.library.begin():
            if track.album and track.album.images:
                uris.update(track.album.images)
        self.library.close()

        for root, _, files in os.walk(self.image_dir):
            for name in files:
                if uritools.urijoin(self.base_uri, name) not in uris:
                    path = os.path.join(root, name)
                    logger.info('Deleting file %s', path)
                    os.remove(path)
Ejemplo n.º 19
0
    def build_ref(type_entry):
        if type_entry["ns"] == "common":
            defs_path = common_config["defs_path"]
        elif type_entry["ns"] == "cbr_process":
            defs_path = cbr_process_config["defs_path"]

        return uritools.urijoin(
            _build_rel_path(
                os.path.join(cbr_process_config["cbr_process_collection_dir"],
                             "dummy"),
                defs_path,
                build_params["root_build_dir"],
            ),
            f"#{type_entry['def']}",
        )
def findFKs(jsonSchema, jsonSchemaURI, prefix=""):
    FKs = []

    if isinstance(jsonSchema, dict):
        # First, this level's foreign keys
        isArray = False

        if 'items' in jsonSchema and isinstance(jsonSchema['items'], dict):
            jsonSchema = jsonSchema['items']
            isArray = True

            if prefix != '':
                prefix += '[]'

        if 'foreign_keys' in jsonSchema and isinstance(
                jsonSchema['foreign_keys'], (list, tuple)):
            for fk_def in jsonSchema['foreign_keys']:
                # Only valid declarations are taken into account
                if isinstance(
                        fk_def, dict
                ) and 'schema_id' in fk_def and 'members' in fk_def:
                    ref_schema_id = fk_def['schema_id']
                    members = fk_def['members']

                    if isinstance(members, (list, tuple)):
                        # Translating to absolute URI (in case it is relative)
                        abs_ref_schema_id = uritools.urijoin(
                            jsonSchemaURI, ref_schema_id)

                        # Translating the paths
                        components = tuple(
                            map(
                                lambda component: prefix + '.' + component
                                if component not in ['.', ''] else prefix,
                                members))

                        FKs.append((abs_ref_schema_id, components))

        # Then, the foreign keys inside sublevels
        if 'properties' in jsonSchema and isinstance(jsonSchema['properties'],
                                                     dict):
            if prefix != '':
                prefix += '.'
            p = jsonSchema['properties']
            for k, subSchema in p.items():
                FKs.extend(findFKs(subSchema, jsonSchemaURI, prefix + k))

    return FKs
Ejemplo n.º 21
0
    def crawl(self, pages, depth=2):
        # iterate for each depth
        for i in range(depth):
            # use a set to prevent repeats
            newpages = set()

            # for each page in pages list
            for page in pages:
                c = None
                try:
                    c = urllib2.urlopen(page)
                except:
                    print("Could not open {}".format(page))

                if not c:
                    continue

                # after retrieving the html
                soup = BeautifulSoup(c.read())

                # index page (as in add all the words into the words table)
                self.addtoindex(page, soup)

                # iterate through all the links in the page
                links = soup('a')
                for link in links:
                    if ('href' in dict(link.attrs)):
                        url = urijoin(page, link['href'])

                        # check for quotes
                        if url.find("'") != -1: continue

                        # remove location fragments
                        url = url.split('#')[0]

                        # is the result a valid url
                        if url[0:4] == 'http' and not self.isindexed(url):
                            newpages.add(url)
                        # create a link between the two pages
                        linkText = self.gettextonly(link)
                        self.addlinkref(page, url, linkText)

                # store the db
                self.dbcommit()

            # recurse
            pages = newpages
Ejemplo n.º 22
0
    def custom_patterns(parser, results):
        a_tags = []
        elements = cxpath_href(parser.doc)
        for element in elements:
            href = element.get('href')

            # skip links to invalid hrefs
            if not href:
                continue
            if href.startswith('javascript:'):
                continue

            # canonicalize the href
            href = uritools.urijoin(parser.url, href)
            href = uritools.uridefrag(href).uri
            href = clean_url(href)
            try:
                href = url_normalize.url_normalize(href)
            except Exception as e:
                logging.debug('failed to normalize url ' + href)
            a_tag = {'href': href}

            # extract the rel
            if element.get('rel') is not None:
                rel = element.get('rel').strip()
                if len(rel) > 0:
                    a_tag['rel'] = rel

            # extract the text_content
            if element.text_content() is not None:
                text_content = element.text_content().strip()
                if len(text_content) > 0:
                    a_tag['text_content'] = text_content

            # add to list
            a_tags.append(a_tag)

        # remove duplicate tags
        a_tags.sort(key=lambda x: x['href'])
        a_tags_dedup = []
        prev_href = None
        for a_tag in a_tags:
            if a_tag['href'] != prev_href:
                prev_href = a_tag['href']
                a_tags_dedup.append(a_tag)

        results.append({'value': a_tags_dedup})
Ejemplo n.º 23
0
    def find_linked_extras(posts):
        extra_page_urls = []
        for post in posts:
            for body_soup in post['body_soups']:
                for element in body_soup.select('[href]'):
                    full_href = uritools.urijoin(
                        post['final_url'], element['href'])
                    defragged_href = uritools.uridefrag(full_href).uri

                    if not url_is_included(defragged_href):
                        href_parts = uritools.urisplit(full_href)
                        base_url_parts = uritools.urisplit(redirects[base_url])
                        if href_parts.host == base_url_parts.host:  # Never try to include linked pages from other domains
                            if defragged_href not in extra_page_urls:
                                # TODO: defragged, or full? Uniqueness or is the fragment important?
                                extra_page_urls.append(defragged_href)
        return extra_page_urls
	def _traverse_dict(schemaURI, j, jp="", fragment=None):
		# Pre-processing
		newPartialSchemaURI = j.get('$id')
		if newPartialSchemaURI:
			# Computing the absolute schema URI
			if uritools.isabsuri(schemaURI):
				newSchemaURI , uriFragment = uritools.uridefrag(uritools.urijoin(schemaURI,newPartialSchemaURI))
			else:
				newSchemaURI , uriFragment = uritools.uridefrag(newPartialSchemaURI)
		else:
			newSchemaURI = schemaURI
		
		# Are we jumping to a different place?
		if newSchemaURI == schemaURI:
			theId = id(j)
			theIdStr = str(theId)
			
			# Does the dictionary contain a '$ref'?
			isRef = REF_FEATURE in j
			
			for k,v in j.items():
				# Following JSON reference standards, we have to
				# ignore other keys when there is a $ref one
				# https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03#section-3
				if isRef and (k != REF_FEATURE):
					continue
				
				elemId = theIdStr + ':' + k
				
				elemPath = jp + '/' + k
				jp2val[elemPath] = elemId
				
				# Is the key among the "special ones"?
				if k in keySet:
					# Saving the correspondence from Python address
					# to unique id of the feature
					id2ElemId.setdefault(theId,{})[k] = [ elemId ]
					keyRefs.setdefault(k,[]).append(FeatureLoc(schemaURI=schemaURI,fragment=fragment,path=elemPath,context=j,id=elemId))
				
				if isinstance(v,dict):
					# Fragment must not be propagated to children
					_traverse_dict(schemaURI,v,jp=elemPath)
				elif isinstance(v,list):
					_traverse_list(schemaURI,v,jp=elemPath)
		else:
			traverseJSONSchema(j,schemaURI=newSchemaURI,fragment=uriFragment,keys=keys,refSchemaListSet=refSchemaListSet)
Ejemplo n.º 25
0
def _build_abs_schema_uri(rel_path: str, cb_schemas_base_url: str) -> str:
    """
    Form an absolute URI from a relative path.

    It takes the :code:`common.cb_schemas_base_url` property provided by the build
    configuration file.

    :param str rel_path: A path relative to :code:`build_params.root_build_dir`.
    :param str cb_schemas_base_url: The base URL pointing to the directory where the
      Cookbase Schemas are to be located.

    :return: An absolute URI intended to unambiguously refer to the given path.
    :rtype: str
    """
    base_url_splits = uritools.urisplit(cb_schemas_base_url)
    abs_path = base_url_splits.path or "/"

    return uritools.urijoin(cb_schemas_base_url,
                            os.path.join(abs_path, rel_path))
Ejemplo n.º 26
0
def meta_tags(url, timeout=15, html=None):
    if not html:
        html = get(url, timeout=timeout).text

    d = pq(html)
    favicon = d('link[rel="shortcut icon"]').attr("href")
    if not favicon:
        favicon_url = urijoin(url, "/favicon.ico")
        r = requests.head(favicon_url)
        if r.status_code == 200:
            favicon = favicon_url

    return {
        "title": d('meta[name="title"]').attr("content") or d("title").text(),
        "description": d('meta[name="description"]').attr("content"),
        "image": d('meta[name="image"]').attr("content"),
        "favicon": favicon,
        "url": d('meta[name="canonical"]').attr("content")
        or d('meta[name="url"]').attr("content"),
        "keywords": d('meta[name="keywords"]').attr("content"),
    }
Ejemplo n.º 27
0
    def _delete_test_dag_ids(self, remote_uri: str,
                             test_dag_ids: List[str]) -> Dict[str, str]:
        """
        Calls an endpoint on Airflow where the etsy-test-api service is active.
        This call will delete test_dag_ids from the airflow DB.
        """
        delete_uri = uritools.urijoin(remote_uri, '/etsy-test-api/delete')
        response = requests.post(delete_uri,
                                 data=json.dumps({'test_dags': test_dag_ids}),
                                 headers={'Content-Type': 'application/json'},
                                 timeout=600)

        results = {}
        if response.status_code == 200:
            try:
                results = response.json()
            except JSONDecodeError as e:
                logging.info(
                    'Received JSONDecodeError error on call to {}. Full error: {}'
                    .format(delete_uri, e))
        return results
Ejemplo n.º 28
0
    def validate(self, validator, fp_def, value, schema):
        if fp_def and isinstance(fp_def, str):
            fp_loc_id = id(schema)

            # Getting the absolute schema id and the route
            if uritools.isabsuri(self.schemaURI):
                abs_ref_schema_id, rel_json_pointer = uritools.uridefrag(
                    uritools.urijoin(self.schemaURI, fp_def))
            else:
                abs_ref_schema_id, rel_json_pointer = uritools.uridefrag(
                    fp_def)
            fpDef = self.FPWorld.setdefault(abs_ref_schema_id,
                                            {}).get(fp_loc_id)

            # And getting the foreign property definition
            if fpDef is None:
                fpDef = FPDef(schemaURI=self.schemaURI,
                              refSchemaURI=abs_ref_schema_id,
                              path='(unknown {})'.format(fp_loc_id),
                              refPath=rel_json_pointer,
                              values=list())
                self.FPWorld[abs_ref_schema_id][fp_loc_id] = fpDef

            obtainedValues = [(value, )]

            isAtomicValue = len(obtainedValues) == 1 and len(
                obtainedValues[0]) == 1 and isinstance(
                    obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES)

            if isAtomicValue:
                theValues = [obtainedValues[0][0]]
            else:
                theValues = UniqueKey.GenKeyStrings(obtainedValues)

            fpVals = fpDef.values

            # Second pass will do the validation
            for theValue in theValues:
                fpVals.append(FPVal(where=self.currentJSONFile,
                                    value=theValue))
Ejemplo n.º 29
0
    def bootstrap(self, refSchemaTuple=tuple()):
        (id2ElemId, keyRefs, refSchemaCache) = refSchemaTuple

        keyList = keyRefs[self.triggerAttribute]
        errors = []
        # Saving the unique locations
        # based on information from FeatureLoc elems
        for loc in keyList:
            fp_def = loc.context[self.triggerAttribute]
            fp_loc_id = id(loc.context)

            # Getting the absolute schema id and the route
            if uritools.isabsuri(self.schemaURI):
                abs_ref_schema_id, rel_json_pointer = uritools.uridefrag(
                    uritools.urijoin(self.schemaURI, fp_def))
            else:
                abs_ref_schema_id, rel_json_pointer = uritools.uridefrag(
                    fp_def)

            if abs_ref_schema_id not in refSchemaCache:
                errors.append({
                    'reason':
                    'fp_no_schema',
                    'description':
                    "No schema with {0} id, required by {1} ({2})".format(
                        abs_ref_schema_id, self.jsonSchemaSource,
                        self.schemaURI)
                })

            fpDefH = self.FPWorld.setdefault(abs_ref_schema_id, {})

            # This control is here for same primary key referenced from multiple cases
            fpDefH[fp_loc_id] = FPDef(schemaURI=self.schemaURI,
                                      refSchemaURI=abs_ref_schema_id,
                                      path=loc.path,
                                      refPath=rel_json_pointer,
                                      values=list())

        return errors
Ejemplo n.º 30
0
	def validate(self,validator,fk_defs,value,schema):
		if fk_defs and isinstance(fk_defs,(list,tuple)):
			fk_defs_gid = str(id(schema))
			for fk_loc_i, p_FK_decl in enumerate(fk_defs):
				fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i)
				ref_schema_id = p_FK_decl['schema_id']
				if uritools.isabsuri(self.schemaURI):
					abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id)
				else:
					abs_ref_schema_id = ref_schema_id
				
				fk_members = p_FK_decl.get('members',[])
				if isinstance(fk_members,list):
					obtainedValues = PrimaryKey.GetKeyValues(value,fk_members)
				else:
					obtainedValues = [(value,)]
				
				isAtomicValue = len(obtainedValues) == 1 and len(obtainedValues[0]) == 1 and isinstance(obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES)
				
				if isAtomicValue:
					theValues = [ obtainedValues[0][0] ]
				else:
					theValues = PrimaryKey.GenKeyStrings(obtainedValues)
				
				# Group the values to be checked
				#fk_id = id(p_FK_decl)  # id(schema)
				fk_id = abs_ref_schema_id
				
				# The common dictionary for this declaration where all the FK values are kept
				fkDef = self.FKWorld.setdefault(fk_id,{}).setdefault(fk_loc_id,FKDef(fkLoc=FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path='(unknown {})'.format(fk_loc_id),values=list()),members=fk_members))
				
				fkLoc = fkDef.fkLoc
				
				fkVals = fkLoc.values
				
				# Second pass will do the validation
				for theValue in theValues:
					fkVals.append(FKVal(where=self.currentJSONFile,value=theValue))
Ejemplo n.º 31
0
def parse_lecture(lect_link, name):
    lect_html = requests.get(urijoin(base, lect_link.get("href"))).text
    lect = BeautifulSoup(lect_html, features="html.parser")
    lect_times = (lect.find(
        "div",
        class_="tudbox").find(lambda tag: tag.name == "td" and "Zeit/Ort:" in
                              tag.text).find_next_sibling("td").text.strip())

    dates = [
        dict(
            name=lect_link.text,
            vert_name=name,
            week=date["week"],
            time=date["time"],
            day=date["day"],
            type="lect",
        ) for date in parse_dates(lect_times)
    ]

    tuts = None
    tuts_row = lect.find("div", class_="tudbox").find(
        lambda tag: tag.name == "td" and "Übungen:" in tag.text)

    if tuts_row:
        dates += [
            dict(
                name=lect_link.text,
                vert_name=name,
                date=date,
                week=date["week"],
                time=date["time"],
                day=date["day"],
                type="tut",
            ) for date in parse_dates(
                list(tuts_row.find_next_sibling("td").findAll("td"))[-1].text)
        ]

    return dates
Ejemplo n.º 32
0
    def __init__(
        self,
        mongodb_url: str,
        db_type: str = DBTypes.MONGODB,
        db_name: str = "cookbase",
    ):
        """Constructor method."""
        if db_type == self.DBTypes.MONGODB:
            self._default_db_id: str = f"{db_type}:{db_name}"

            try:
                client = pymongo.MongoClient(uritools.urijoin(mongodb_url, db_name))
                client.admin.command("ismaster")
            except pymongo.errors.PyMongoError:
                import sys

                raise DBClientConnectionError(self._default_db_id).with_traceback(
                    sys.exc_info()[2]
                )

            self._default_db: Any = client[db_name]
            self._connections: Dict[str, Any] = {self._default_db_id: self._default_db}
        else:
            raise InvalidDBTypeError(db_type)
Ejemplo n.º 33
0
 def _get_test_dag_ids(self,
                       remote_uri: str,
                       ttl_days: int = 7) -> List[str]:
     """
     Calls an endpoint on Airflow where the etsy-test-api service is active.
     If the service does not exist (e.g., on production airflows), this call
     will return an empty list.
     """
     list_uri = uritools.urijoin(
         remote_uri,
         '/etsy-test-api/list_test_dags?ttl_days={}'.format(ttl_days))
     response = requests.get(list_uri, timeout=10)
     test_dag_ids = []
     if response.status_code == 200:
         try:
             payload = response.json()
             test_dag_ids = payload.get(
                 'test_dags') if 'test_dags' in payload else []
         except JSONDecodeError as _e:
             logging.info(
                 'Received JSONDecodeError on call to {}.'.format(list_uri))
             logging.info(
                 'Note: This is okay on Production airflow instances.')
     return test_dag_ids
Ejemplo n.º 34
0
 def geturi(self, filename):
     return uritools.urijoin(self._base_uri, filename)
Ejemplo n.º 35
0
 def geturl(self, identifier, filename=None):
     if filename:
         path = identifier + '/' + uritools.uriencode(filename)
     else:
         path = identifier + '/'
     return uritools.urijoin(self.base_url, '/download/' + path)
Ejemplo n.º 36
0
def to_canonical_uri(current_base_uri, uri):
    return urijoin(current_base_uri, uri)