def _populate_uri_to_validator( validator: AValidator, root_base_uri, uri_to_validator: Dict, ): """ This needs to be run after _set_to_canonical_uri because _set_to_canonical_uri propagates the base_URIs through the subschemas. returns a dictionary of the uri to validator mapping the uris are of three types. - canonical id - canonical id + location - canonical id + anchor """ if (isinstance(validator, (Keyword, AValidator)) and not isinstance(validator, KeywordGroup) and validator.location): uri_to_validator[urijoin(root_base_uri, "#" + validator.location)] = validator if validator.id is not None and isinstance(validator, Validator): validator_id = validator.id.rstrip("/") uri_to_validator[validator_id] = validator if validator.anchor: uri_to_validator[(urijoin(validator.base_uri or "", validator.anchor))] = validator for sub_validator in validator.sub_validators(): _populate_uri_to_validator( validator=sub_validator, root_base_uri=root_base_uri, uri_to_validator=uri_to_validator, )
def _make_podcast_uri(self, xml, ep_guid=None): if not xml: return None # Feeds use main URL, not localized baseurl = ivooxapi.get_baseurl() program = uritools.urijoin(baseurl, xml) episode = '#' + uritools.urijoin(baseurl, ep_guid) if ep_guid else '' return 'podcast+{}{}'.format(program, episode)
def check(self, base, ref, expected, strict=False): self.assertEqual(expected, urijoin(base, ref, strict)) # base as bytes, ref as str self.assertEqual(expected, urijoin(base.encode(), ref, strict)) # base as str, ref as bytes self.assertEqual(expected, urijoin(base, ref.encode(), strict)) # both base and ref as bytes self.assertEqual(expected.encode(), urijoin(base.encode(), ref.encode(), strict))
def wrap_response(url, data, method): title = ("title" in data and data["title"]) or "" image = ("image" in data and data["image"]) or "" favicon = ("favicon" in data and data["favicon"]) or "" description = ("description" in data and data["description"]) or "" url = ("url" in data and data["url"]) or url css = ("css" in data and data["css"]) or "" if image: image = urijoin(url, image) if favicon: favicon = urijoin(url, favicon) domain = urisplit(url).authority if "domain" in data and data["domain"]: domain = data["domain"] site = data["title"] if "site_name" in data and data["site_name"]: site = data["site_name"] if "provider_name" in data and data["provider_name"]: site = data["provider_name"] if method == "oembed" or (method == "custom" and "html" in data and data["html"]): html = data["html"] else: if image: html = template("unfurl-image", url, image, title, description, domain) elif favicon: html = template( "unfurl-image unfurl-favicon", url, favicon, title, description, domain ) else: html = template( "unfurl-image unfurl-default", url, "https://i.imgur.com/wQ37ilJ.png", title, description, domain, ) return { "method": method, "site": site, "domain": domain, "title": title, "description": description, "image": image, "favicon": favicon, "url": url, "type": "rich", "html": html, "css": css, }
def check(self, base, ref, expected, strict=False): self.assertEqual(expected, urijoin(base, ref, strict)) # base as bytes, ref as str self.assertEqual(expected, urijoin(base.encode(), ref, strict)) # base as str, ref as bytes self.assertEqual(expected, urijoin(base, ref.encode(), strict)) # both base and ref as bytes self.assertEqual( expected.encode(), urijoin(base.encode(), ref.encode(), strict) )
def url_fix(self, url): if url == "": # Null case rv = url elif url.startswith('data:'): # data uri, not actually a link, leave it alone # e.g. data:text/html,<script>alert('hi');</script> rv = url elif urisplit(url)[0]: # absolute / has scheme # e.g http://example.com/path rv = self.proxy +'/' + url elif urisplit(url)[1]: # protocol relatives prefixed with '//' / no scheme but has authority # e.g. //example.com/path # PRs should be able to be either http or https. In practice some sites are turning off # support for http, so just force everything through https rv = self.proxy + '/https://' + url.lstrip('/') elif not urisplit(url)[1] and urisplit(url)[2]: # relative / no authority but path # e.g. ../path rv = self.proxy + '/' + urijoin(self.host, url) elif url == '#' or (not urisplit(url)[2] and not urisplit(url)[3] and urisplit(url)[4]): # fragment # e.g. #id-12345 # fragments are left alone rv = url else: click.echo('\n\nUnknown url protocol with url: %s' % url) rv = url return rv
def get_redirect(url): # simple case: it's already in the dict url = absolute_from_relative_url(url) if url in redirects: return redirects[url] # Try looking it up without the fragment defrag_url = uritools.uridefrag(url).uri fragment = uritools.uridefrag(url).uri if fragment: if defrag_url in redirects: return uritools.urijoin(redirects[defrag_url], '#'+fragment) # Try fixing http/https to match the TOC url_parts = uritools.urisplit(url) base_url_parts = uritools.urisplit(redirects[base_url]) fixed_scheme_url = uritools.uriunsplit( list(base_url_parts)[:1] + list(url_parts)[1:]) if fixed_scheme_url in redirects: return redirects[fixed_scheme_url] # if same domain, try scraping it if url_parts.host == base_url_parts.host: try: print(f"Scraping url for get_redirect: {url}") scraper_result = scraper.scrape( url, wait_for_selector=config['post_body_selector']) redirects[url] = scraper_result['final_url'] # TODO: Make store this scraped result in the book as well? return redirects[url] except (urllib.error.URLError, ssl.SSLError): return url # TODO: Could return '' or something but for now leaving it seems fine # else, couldn't find it, so leave it alone. return url
def dereference(self, uri: str) -> dict: """Return JSON object corresponding to resolved URI reference :param uri: URI string """ reference_path = urijoin(self.scope_uri, uri) return self.registry.load_uri(reference_path)
def follow_uri(self, uri: str) -> 'Context': """Return new Context corresponding to scope after following uri :param uri: URI string """ new_uri = urijoin(self.scope_uri, uri) return self.__class__(new_uri, self.registry)
def _get_or_create_image_file(self, path, data=None): what = imghdr.what(path, data) if not what: raise ValueError("Unknown image type") if not data: with open(path, "rb") as f: data = f.read() digest, width, height = hashlib.md5(data).hexdigest(), None, None try: if what == "png": width, height = get_image_size_png(data) elif what == "gif": width, height = get_image_size_gif(data) elif what == "jpeg": width, height = get_image_size_jpeg(data) except Exception as e: logger.error("Error getting image size for %r: %r", path, e) if width and height: name = "%s-%dx%d.%s" % (digest, width, height, what) else: name = f"{digest}.{what}" image_path = self._image_dir / name if not image_path.is_file(): logger.info(f"Creating file {image_path.as_uri()}") image_path.write_bytes(data) return uritools.urijoin(self._base_uri, name)
def _get_or_create_image_file(self, path, data=None): what = imghdr.what(path, data) if not what: raise ValueError('Unknown image type') if not data: data = open(path).read() digest, width, height = hashlib.md5(data).hexdigest(), None, None try: if what == 'png': width, height = get_image_size_png(data) elif what == 'gif': width, height = get_image_size_gif(data) elif what == 'jpeg': width, height = get_image_size_jpeg(data) except Exception as e: logger.error('Error getting image size for %r: %r', path, e) if width and height: name = '%s-%dx%d.%s' % (digest, width, height, what) else: name = '%s.%s' % (digest, what) dest = os.path.join(self.image_dir, name) if not os.path.isfile(dest): logger.info('Creating file %s', dest) with open(dest, 'wb') as fh: fh.write(data) return uritools.urijoin(self.base_uri, name)
def bootstrap(self, refSchemaTuple = tuple()): (id2ElemId , keyRefs , refSchemaCache) = refSchemaTuple keyList = keyRefs[self.triggerAttribute] errors = [] # Saving the unique locations # based on information from FeatureLoc elems for loc in keyList: fk_defs = loc.context[self.triggerAttribute] fk_defs_gid = str(id(loc.context)) #fk_defs_gid = loc.path for fk_loc_i, p_FK_decl in enumerate(fk_defs): fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i) ref_schema_id = p_FK_decl['schema_id'] if uritools.isabsuri(self.schemaURI): abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id) else: abs_ref_schema_id = ref_schema_id if abs_ref_schema_id not in refSchemaCache: errors.append({ 'reason': 'fk_no_schema', 'description': "No schema with {0} id, required by {1} ({2})".format(abs_ref_schema_id,self.jsonSchemaSource,self.schemaURI) }) fk_members = p_FK_decl.get('members',[]) fkLoc = FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path=loc.path+'/'+str(fk_loc_i),values=list()) fk_id = abs_ref_schema_id fkDefH = self.FKWorld.setdefault(fk_id,{}) # This control is here for same primary key referenced from multiple cases fkDefH[fk_loc_id] = FKDef(fkLoc=fkLoc,members=fk_members) return errors
def recursive_traversal( schema_object: Union[Dict[str, Any], List[Any]]) -> Dict[str, Any]: retrieved_object = None if isinstance(schema_object, dict): for k, v in schema_object.items(): if isinstance(v, dict) or isinstance(v, list): recursive_traversal(v) elif k == "$ref": # Processes case if v.startswith("processes/"): pass else: if not uritools.urisplit(v).gethost(): v = uritools.urijoin(url, v) retrieved_object = self._resolve_schema(v) if retrieved_object is not None: schema_object.update(retrieved_object) del schema_object["$ref"] if isinstance(schema_object, list): for i in schema_object: if isinstance(i, dict) or isinstance(i, list): recursive_traversal(i)
def _cleanup_images(self): logger.info("Cleaning up image directory") with self._connect() as c: uris = set(schema.get_image_uris(c)) for image_path in self._image_dir.glob("**/*"): if uritools.urijoin(self._base_uri, image_path.name) not in uris: logger.info(f"Deleting file {image_path.as_uri()}") image_path.unlink()
def get_or_create_image_file(self, path, data=None): what = imghdr.what(path, data) if not what: raise ValueError('Unknown image type') if not data: data = open(path).read() name = hashlib.md5(data).hexdigest() + '.' + what path = os.path.join(self.image_dir, name) get_or_create_file(str(path), True, data) return uritools.urijoin(self.base_uri, name)
def _get(self, path, params=None): url = uritools.urijoin(self.base_url, path) retries = self.retries timeout = self.timeout while True: try: return self.session.get(url, params=params, timeout=timeout) except requests.exceptions.ConnectionError as e: if not retries: raise e logger.warn('Error connecting to the Internet Archive: %s', e) retries -= 1
def cleanup(self): logger.info('Cleaning up image directory') uris = set() for track in self.library.begin(): if track.album and track.album.images: uris.update(track.album.images) self.library.close() for root, _, files in os.walk(self.image_dir): for name in files: if uritools.urijoin(self.base_uri, name) not in uris: path = os.path.join(root, name) logger.info('Deleting file %s', path) os.remove(path)
def _cleanup(self): logger.info('Cleaning up image directory') uris = set() for track in self.library.begin(): if track.album and track.album.images: uris.update(track.album.images) self.library.close() for root, _, files in os.walk(self.image_dir): for name in files: if uritools.urijoin(self.base_uri, name) not in uris: path = os.path.join(root, name) logger.info('Deleting file %s', path) os.remove(path)
def build_ref(type_entry): if type_entry["ns"] == "common": defs_path = common_config["defs_path"] elif type_entry["ns"] == "cbr_process": defs_path = cbr_process_config["defs_path"] return uritools.urijoin( _build_rel_path( os.path.join(cbr_process_config["cbr_process_collection_dir"], "dummy"), defs_path, build_params["root_build_dir"], ), f"#{type_entry['def']}", )
def findFKs(jsonSchema, jsonSchemaURI, prefix=""): FKs = [] if isinstance(jsonSchema, dict): # First, this level's foreign keys isArray = False if 'items' in jsonSchema and isinstance(jsonSchema['items'], dict): jsonSchema = jsonSchema['items'] isArray = True if prefix != '': prefix += '[]' if 'foreign_keys' in jsonSchema and isinstance( jsonSchema['foreign_keys'], (list, tuple)): for fk_def in jsonSchema['foreign_keys']: # Only valid declarations are taken into account if isinstance( fk_def, dict ) and 'schema_id' in fk_def and 'members' in fk_def: ref_schema_id = fk_def['schema_id'] members = fk_def['members'] if isinstance(members, (list, tuple)): # Translating to absolute URI (in case it is relative) abs_ref_schema_id = uritools.urijoin( jsonSchemaURI, ref_schema_id) # Translating the paths components = tuple( map( lambda component: prefix + '.' + component if component not in ['.', ''] else prefix, members)) FKs.append((abs_ref_schema_id, components)) # Then, the foreign keys inside sublevels if 'properties' in jsonSchema and isinstance(jsonSchema['properties'], dict): if prefix != '': prefix += '.' p = jsonSchema['properties'] for k, subSchema in p.items(): FKs.extend(findFKs(subSchema, jsonSchemaURI, prefix + k)) return FKs
def crawl(self, pages, depth=2): # iterate for each depth for i in range(depth): # use a set to prevent repeats newpages = set() # for each page in pages list for page in pages: c = None try: c = urllib2.urlopen(page) except: print("Could not open {}".format(page)) if not c: continue # after retrieving the html soup = BeautifulSoup(c.read()) # index page (as in add all the words into the words table) self.addtoindex(page, soup) # iterate through all the links in the page links = soup('a') for link in links: if ('href' in dict(link.attrs)): url = urijoin(page, link['href']) # check for quotes if url.find("'") != -1: continue # remove location fragments url = url.split('#')[0] # is the result a valid url if url[0:4] == 'http' and not self.isindexed(url): newpages.add(url) # create a link between the two pages linkText = self.gettextonly(link) self.addlinkref(page, url, linkText) # store the db self.dbcommit() # recurse pages = newpages
def custom_patterns(parser, results): a_tags = [] elements = cxpath_href(parser.doc) for element in elements: href = element.get('href') # skip links to invalid hrefs if not href: continue if href.startswith('javascript:'): continue # canonicalize the href href = uritools.urijoin(parser.url, href) href = uritools.uridefrag(href).uri href = clean_url(href) try: href = url_normalize.url_normalize(href) except Exception as e: logging.debug('failed to normalize url ' + href) a_tag = {'href': href} # extract the rel if element.get('rel') is not None: rel = element.get('rel').strip() if len(rel) > 0: a_tag['rel'] = rel # extract the text_content if element.text_content() is not None: text_content = element.text_content().strip() if len(text_content) > 0: a_tag['text_content'] = text_content # add to list a_tags.append(a_tag) # remove duplicate tags a_tags.sort(key=lambda x: x['href']) a_tags_dedup = [] prev_href = None for a_tag in a_tags: if a_tag['href'] != prev_href: prev_href = a_tag['href'] a_tags_dedup.append(a_tag) results.append({'value': a_tags_dedup})
def find_linked_extras(posts): extra_page_urls = [] for post in posts: for body_soup in post['body_soups']: for element in body_soup.select('[href]'): full_href = uritools.urijoin( post['final_url'], element['href']) defragged_href = uritools.uridefrag(full_href).uri if not url_is_included(defragged_href): href_parts = uritools.urisplit(full_href) base_url_parts = uritools.urisplit(redirects[base_url]) if href_parts.host == base_url_parts.host: # Never try to include linked pages from other domains if defragged_href not in extra_page_urls: # TODO: defragged, or full? Uniqueness or is the fragment important? extra_page_urls.append(defragged_href) return extra_page_urls
def _traverse_dict(schemaURI, j, jp="", fragment=None): # Pre-processing newPartialSchemaURI = j.get('$id') if newPartialSchemaURI: # Computing the absolute schema URI if uritools.isabsuri(schemaURI): newSchemaURI , uriFragment = uritools.uridefrag(uritools.urijoin(schemaURI,newPartialSchemaURI)) else: newSchemaURI , uriFragment = uritools.uridefrag(newPartialSchemaURI) else: newSchemaURI = schemaURI # Are we jumping to a different place? if newSchemaURI == schemaURI: theId = id(j) theIdStr = str(theId) # Does the dictionary contain a '$ref'? isRef = REF_FEATURE in j for k,v in j.items(): # Following JSON reference standards, we have to # ignore other keys when there is a $ref one # https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03#section-3 if isRef and (k != REF_FEATURE): continue elemId = theIdStr + ':' + k elemPath = jp + '/' + k jp2val[elemPath] = elemId # Is the key among the "special ones"? if k in keySet: # Saving the correspondence from Python address # to unique id of the feature id2ElemId.setdefault(theId,{})[k] = [ elemId ] keyRefs.setdefault(k,[]).append(FeatureLoc(schemaURI=schemaURI,fragment=fragment,path=elemPath,context=j,id=elemId)) if isinstance(v,dict): # Fragment must not be propagated to children _traverse_dict(schemaURI,v,jp=elemPath) elif isinstance(v,list): _traverse_list(schemaURI,v,jp=elemPath) else: traverseJSONSchema(j,schemaURI=newSchemaURI,fragment=uriFragment,keys=keys,refSchemaListSet=refSchemaListSet)
def _build_abs_schema_uri(rel_path: str, cb_schemas_base_url: str) -> str: """ Form an absolute URI from a relative path. It takes the :code:`common.cb_schemas_base_url` property provided by the build configuration file. :param str rel_path: A path relative to :code:`build_params.root_build_dir`. :param str cb_schemas_base_url: The base URL pointing to the directory where the Cookbase Schemas are to be located. :return: An absolute URI intended to unambiguously refer to the given path. :rtype: str """ base_url_splits = uritools.urisplit(cb_schemas_base_url) abs_path = base_url_splits.path or "/" return uritools.urijoin(cb_schemas_base_url, os.path.join(abs_path, rel_path))
def meta_tags(url, timeout=15, html=None): if not html: html = get(url, timeout=timeout).text d = pq(html) favicon = d('link[rel="shortcut icon"]').attr("href") if not favicon: favicon_url = urijoin(url, "/favicon.ico") r = requests.head(favicon_url) if r.status_code == 200: favicon = favicon_url return { "title": d('meta[name="title"]').attr("content") or d("title").text(), "description": d('meta[name="description"]').attr("content"), "image": d('meta[name="image"]').attr("content"), "favicon": favicon, "url": d('meta[name="canonical"]').attr("content") or d('meta[name="url"]').attr("content"), "keywords": d('meta[name="keywords"]').attr("content"), }
def _delete_test_dag_ids(self, remote_uri: str, test_dag_ids: List[str]) -> Dict[str, str]: """ Calls an endpoint on Airflow where the etsy-test-api service is active. This call will delete test_dag_ids from the airflow DB. """ delete_uri = uritools.urijoin(remote_uri, '/etsy-test-api/delete') response = requests.post(delete_uri, data=json.dumps({'test_dags': test_dag_ids}), headers={'Content-Type': 'application/json'}, timeout=600) results = {} if response.status_code == 200: try: results = response.json() except JSONDecodeError as e: logging.info( 'Received JSONDecodeError error on call to {}. Full error: {}' .format(delete_uri, e)) return results
def validate(self, validator, fp_def, value, schema): if fp_def and isinstance(fp_def, str): fp_loc_id = id(schema) # Getting the absolute schema id and the route if uritools.isabsuri(self.schemaURI): abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( uritools.urijoin(self.schemaURI, fp_def)) else: abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( fp_def) fpDef = self.FPWorld.setdefault(abs_ref_schema_id, {}).get(fp_loc_id) # And getting the foreign property definition if fpDef is None: fpDef = FPDef(schemaURI=self.schemaURI, refSchemaURI=abs_ref_schema_id, path='(unknown {})'.format(fp_loc_id), refPath=rel_json_pointer, values=list()) self.FPWorld[abs_ref_schema_id][fp_loc_id] = fpDef obtainedValues = [(value, )] isAtomicValue = len(obtainedValues) == 1 and len( obtainedValues[0]) == 1 and isinstance( obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES) if isAtomicValue: theValues = [obtainedValues[0][0]] else: theValues = UniqueKey.GenKeyStrings(obtainedValues) fpVals = fpDef.values # Second pass will do the validation for theValue in theValues: fpVals.append(FPVal(where=self.currentJSONFile, value=theValue))
def bootstrap(self, refSchemaTuple=tuple()): (id2ElemId, keyRefs, refSchemaCache) = refSchemaTuple keyList = keyRefs[self.triggerAttribute] errors = [] # Saving the unique locations # based on information from FeatureLoc elems for loc in keyList: fp_def = loc.context[self.triggerAttribute] fp_loc_id = id(loc.context) # Getting the absolute schema id and the route if uritools.isabsuri(self.schemaURI): abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( uritools.urijoin(self.schemaURI, fp_def)) else: abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( fp_def) if abs_ref_schema_id not in refSchemaCache: errors.append({ 'reason': 'fp_no_schema', 'description': "No schema with {0} id, required by {1} ({2})".format( abs_ref_schema_id, self.jsonSchemaSource, self.schemaURI) }) fpDefH = self.FPWorld.setdefault(abs_ref_schema_id, {}) # This control is here for same primary key referenced from multiple cases fpDefH[fp_loc_id] = FPDef(schemaURI=self.schemaURI, refSchemaURI=abs_ref_schema_id, path=loc.path, refPath=rel_json_pointer, values=list()) return errors
def validate(self,validator,fk_defs,value,schema): if fk_defs and isinstance(fk_defs,(list,tuple)): fk_defs_gid = str(id(schema)) for fk_loc_i, p_FK_decl in enumerate(fk_defs): fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i) ref_schema_id = p_FK_decl['schema_id'] if uritools.isabsuri(self.schemaURI): abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id) else: abs_ref_schema_id = ref_schema_id fk_members = p_FK_decl.get('members',[]) if isinstance(fk_members,list): obtainedValues = PrimaryKey.GetKeyValues(value,fk_members) else: obtainedValues = [(value,)] isAtomicValue = len(obtainedValues) == 1 and len(obtainedValues[0]) == 1 and isinstance(obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES) if isAtomicValue: theValues = [ obtainedValues[0][0] ] else: theValues = PrimaryKey.GenKeyStrings(obtainedValues) # Group the values to be checked #fk_id = id(p_FK_decl) # id(schema) fk_id = abs_ref_schema_id # The common dictionary for this declaration where all the FK values are kept fkDef = self.FKWorld.setdefault(fk_id,{}).setdefault(fk_loc_id,FKDef(fkLoc=FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path='(unknown {})'.format(fk_loc_id),values=list()),members=fk_members)) fkLoc = fkDef.fkLoc fkVals = fkLoc.values # Second pass will do the validation for theValue in theValues: fkVals.append(FKVal(where=self.currentJSONFile,value=theValue))
def parse_lecture(lect_link, name): lect_html = requests.get(urijoin(base, lect_link.get("href"))).text lect = BeautifulSoup(lect_html, features="html.parser") lect_times = (lect.find( "div", class_="tudbox").find(lambda tag: tag.name == "td" and "Zeit/Ort:" in tag.text).find_next_sibling("td").text.strip()) dates = [ dict( name=lect_link.text, vert_name=name, week=date["week"], time=date["time"], day=date["day"], type="lect", ) for date in parse_dates(lect_times) ] tuts = None tuts_row = lect.find("div", class_="tudbox").find( lambda tag: tag.name == "td" and "Übungen:" in tag.text) if tuts_row: dates += [ dict( name=lect_link.text, vert_name=name, date=date, week=date["week"], time=date["time"], day=date["day"], type="tut", ) for date in parse_dates( list(tuts_row.find_next_sibling("td").findAll("td"))[-1].text) ] return dates
def __init__( self, mongodb_url: str, db_type: str = DBTypes.MONGODB, db_name: str = "cookbase", ): """Constructor method.""" if db_type == self.DBTypes.MONGODB: self._default_db_id: str = f"{db_type}:{db_name}" try: client = pymongo.MongoClient(uritools.urijoin(mongodb_url, db_name)) client.admin.command("ismaster") except pymongo.errors.PyMongoError: import sys raise DBClientConnectionError(self._default_db_id).with_traceback( sys.exc_info()[2] ) self._default_db: Any = client[db_name] self._connections: Dict[str, Any] = {self._default_db_id: self._default_db} else: raise InvalidDBTypeError(db_type)
def _get_test_dag_ids(self, remote_uri: str, ttl_days: int = 7) -> List[str]: """ Calls an endpoint on Airflow where the etsy-test-api service is active. If the service does not exist (e.g., on production airflows), this call will return an empty list. """ list_uri = uritools.urijoin( remote_uri, '/etsy-test-api/list_test_dags?ttl_days={}'.format(ttl_days)) response = requests.get(list_uri, timeout=10) test_dag_ids = [] if response.status_code == 200: try: payload = response.json() test_dag_ids = payload.get( 'test_dags') if 'test_dags' in payload else [] except JSONDecodeError as _e: logging.info( 'Received JSONDecodeError on call to {}.'.format(list_uri)) logging.info( 'Note: This is okay on Production airflow instances.') return test_dag_ids
def geturi(self, filename): return uritools.urijoin(self._base_uri, filename)
def geturl(self, identifier, filename=None): if filename: path = identifier + '/' + uritools.uriencode(filename) else: path = identifier + '/' return uritools.urijoin(self.base_url, '/download/' + path)
def to_canonical_uri(current_base_uri, uri): return urijoin(current_base_uri, uri)