def resolve(self, ident): '''get me the file''' ident = unquote(ident) local_fp = join(self.cache_root, ident) logger.debug('local_fp: %s' % (local_fp)) if exists(local_fp): format = 'jp2' # FIXME logger.debug('src image from local disk: %s' % (local_fp,)) return (local_fp, format) else: # get image from S3 bucketname = self.s3bucket keyname = '{0}{1}'.format(self.prefix, ident) logger.debug('Getting img from AWS S3. bucketname, keyname: %s, %s' % (bucketname, keyname)) s3 = boto.connect_s3() bucket = s3.get_bucket(bucketname, validate=False) key = bucket.get_key(keyname, validate=False) try: res = key.get_contents_to_filename(local_fp) except boto.exception.S3ResponseError as e: message = 'Source image not found for identifier: %s.' % (ident,) logger.warn(message) raise ResolverException(404, message) format = 'jp2' #FIXME logger.debug('src format %s' % (format,)) return (local_fp, format)
def resolve(self, app, ident, base_uri): ident = unquote(ident) local_fp = join(self.cache_root, ident) logger.debug('local_fp: %s' % (local_fp)) if exists(local_fp): format_ = 'jp2' # FIXME logger.debug('src image from local disk: %s' % (local_fp, )) else: # get image from S3 bucketname = self.s3bucket keyname = '{0}{1}'.format(self.prefix, ident).strip("/") logger.debug( 'Getting img from AWS S3. bucketname, keyname: %s, %s' % (bucketname, keyname)) s3 = boto3.client('s3') try: s3.download_file(bucketname, keyname, local_fp) except botocore.exceptions.ClientError as e: message = 'Source image not found for identifier: %s.' % ( ident, ) logger.warn(e, message) raise ResolverException(404, message) format_ = 'jp2' #FIXME logger.debug('src format %s' % (format, )) return ImageInfo(app=app, src_img_fp=local_fp, src_format=format_, auth_rules={})
def __init__(self, config): super(S3Resolver, self).__init__(config) self.default_format = self.config.get("default_format", None) self._ident_regex_checker = IdentRegexChecker( ident_regex=self.config.get("ident_regex")) self._cache_namer = CacheNamer() if "cache_root" in self.config: self.cache_root = self.config["cache_root"] else: message = ("Server Side Error: Configuration incomplete and " "cannot resolve. Missing setting for cache_root.") logger.error(message) raise ResolverException(message) self.has_bucket_map = False if "bucket_map" in config: self.bucket_map = config["bucket_map"] self.has_bucket_map = True logger.debug("s3 bucket_map: {}".format(self.bucket_map)) # boto3: if not in us-east-1, set envvar AWS_DEFAULT_REGION to avoid extra # requests when downloading from s3 # thread safe: # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html#multithreading-multiprocessing session = boto3.session.Session() self.s3 = session.resource("s3") logger.info("loaded s3 resolver with config: {}".format(config))
def raise_404_for_ident(self, ident): source_fp = self.source_file_path(ident) public_message = 'Source image not found for identifier: %s.' % ( ident, ) log_message = 'Source image not found at %s for identifier: %s.' % ( source_fp, ident) logger.warn(log_message) raise ResolverException(404, public_message)
def format_from_ident(self, ident): if ident.rfind('.') != -1: extension = ident.split('.')[-1] if len(extension) < 5: extension = extension.lower() return constants.EXTENSION_MAP.get(extension, extension) raise ResolverException("Format could not be determined for %r." % ident)
def copy_to_cache(self, ident): ident = unquote(ident) cache_dir = self.cache_dir_path(ident) mkdir_p(cache_dir) #get source image and write to temporary file (source_url, options) = self._web_request_url(ident) with closing(requests.get(source_url, stream=True, **options)) as response: if not response.ok: public_message = 'Source image not found for identifier: %s. Status code returned: %s' % ( ident, response.status_code) log_message = 'Source image not found at %s for identifier: %s. Status code returned: %s' % ( source_url, ident, response.status_code) logger.warn(log_message) raise ResolverException(404, public_message) extension = self.cache_file_extension(ident, response) local_fp = join(cache_dir, "loris_cache." + extension) with tempfile.NamedTemporaryFile(dir=cache_dir, delete=False) as tmp_file: for chunk in response.iter_content(2048): tmp_file.write(chunk) tmp_file.flush() #now rename the tmp file to the desired file name if it still doesn't exist # (another process could have created it) if exists(local_fp): logger.info('another process downloaded src image %s', local_fp) remove(tmp_file.name) else: rename(tmp_file.name, local_fp) logger.info("Copied %s to %s", source_url, local_fp) # Check for rules file associated with image file # These files are < 2k in size, so fetch in one go. # Assumes that the rules will be next to the image # cache_dir is image specific, so this is easy bits = split(source_url) fn = bits[1].rsplit('.')[0] + "." + self.auth_rules_ext rules_url = bits[0] + '/' + fn try: resp = requests.get(rules_url) if resp.status_code == 200: local_rules_fp = join(cache_dir, "loris_cache." + self.auth_rules_ext) if not exists(local_rules_fp): fh = open(local_rules_fp, 'w') fh.write(r.text) fh.close() except: # No connection available pass return local_fp
def raise_404_for_ident(self, ident): source_fp = self.source_file_path(ident) logger.warn( "Source image not found at %s for identifier: %s.", source_fp, ident ) raise ResolverException( "Source image not found for identifier: %s." % ident )
def _web_request_url(self, ident): # only split identifiers that look like template ids; # ignore other requests (e.g. favicon) if ':' not in ident: logger.warn('Bad URL request for identifier: %r.', ident) raise ResolverException( "Bad URL request made for identifier: %r." % ident) prefix, ident_parts = ident.split(':', 1) try: url_template = self.templates[prefix]['url'] except KeyError: logger.warn('No template found for identifier: %r.', ident) raise ResolverException( "Bad URL request made for identifier: %r." % ident) try: url = url_template % tuple( ident_parts.split(self.config['delimiter'])) except KeyError: url = url_template % ident_parts except TypeError as e: # Raised if there are more parts in the ident than spaces in # the template, e.g. '%s' % (1, 2). logger.warn( 'TypeError raised when processing identifier: %r (%r).', (ident, e)) raise ResolverException( "Bad URL request made for identifier: %r." % ident) # Get the generic options options = self.request_options() # Then add any template-specific ones conf = self.templates[prefix] if 'cert' in conf and 'key' in conf: options['cert'] = (conf['cert'], conf['key']) if 'user' in conf and 'pw' in conf: options['auth'] = (conf['user'], conf['pw']) if 'ssl_check' in conf: options['verify'] = conf['ssl_check'] return (url, options)
def _web_request_url(self, ident): if ident.startswith(('http://', 'https://')) and self.uri_resolvable: url = ident else: url = self.source_prefix + ident + self.source_suffix if not url.startswith(('http://', 'https://')): logger.warn('Bad URL request at %s for identifier: %s.', url, ident) raise ResolverException( "Bad URL request made for identifier: %r." % ident) return (url, self.request_options())
def __init__(self, config): super(SimpleHTTPResolver, self).__init__(config) self.source_prefix = self.config.get('source_prefix', '') self.source_suffix = self.config.get('source_suffix', '') self.default_format = self.config.get('default_format', None) self.head_resolvable = self.config.get('head_resolvable', False) self.uri_resolvable = self.config.get('uri_resolvable', False) self.user = self.config.get('user', None) self.pw = self.config.get('pw', None) self.cert = self.config.get('cert', None) self.key = self.config.get('key', None) self.ssl_check = self.config.get('ssl_check', True) self._ident_regex_checker = IdentRegexChecker( ident_regex=self.config.get('ident_regex') ) self._cache_namer = CacheNamer() if 'cache_root' in self.config: self.cache_root = self.config['cache_root'] else: message = 'Server Side Error: Configuration incomplete and cannot resolve. Missing setting for cache_root.' logger.error(message) raise ResolverException(message) if not self.uri_resolvable and self.source_prefix == '': message = 'Server Side Error: Configuration incomplete and cannot resolve. Must either set uri_resolvable' \ ' or source_prefix settings.' logger.error(message) raise ResolverException(message)
def _web_request_url(self, ident): if (ident.startswith('http://') or ident.startswith('https://')) and self.uri_resolvable: url = ident else: url = self.source_prefix + ident + self.source_suffix if not (url.startswith('http://') or url.startswith('https://')): logger.warn('Bad URL request at %s for identifier: %s.', url, ident) public_message = 'Bad URL request made for identifier: %s.' % ( ident, ) raise ResolverException(404, public_message) return (url, self.request_options())
def s3bucket_from_ident(self, ident): """ returns tuple(buckename, keyname) parsed from ident.""" key_parts = ident.split("/", 1) if len(key_parts) == 2: (bucket, partial_key) = key_parts else: raise ResolverException( "Invalid identifier. Expected bucket/ident; got {}".format( key_parts)) # check if bucketname actually means something different if self.has_bucket_map and bucket in self.bucket_map: bucketname = self.bucket_map[bucket]["bucket"] if "key_prefix" in self.bucket_map[bucket]: keyname = os.path.join(self.bucket_map[bucket]["key_prefix"], partial_key) else: keyname = partial_key return (bucketname, keyname) else: # what came in ident is the actual bucketname return (bucket, partial_key)
def copy_to_cache(self, ident): ident = unquote(ident) #get source image and write to temporary file (source_url, options) = self._web_request_url(ident) assert source_url is not None cache_dir = self.cache_dir_path(ident) mkdir_p(cache_dir) with closing(requests.get(source_url, stream=True, **options)) as response: if not response.ok: logger.warn( "Source image not found at %s for identifier: %s. " "Status code returned: %s.", source_url, ident, response.status_code) raise ResolverException( "Source image not found for identifier: %s. " "Status code returned: %s." % (ident, response.status_code)) extension = self.cache_file_extension(ident, response) local_fp = join(cache_dir, "loris_cache." + extension) with tempfile.NamedTemporaryFile(dir=cache_dir, delete=False) as tmp_file: for chunk in response.iter_content(2048): tmp_file.write(chunk) # Now rename the temp file to the desired file name if it still # doesn't exist (another process could have created it). # # Note: This is purely an optimisation; if the file springs into # existence between the existence check and the copy, it will be # overridden. if exists(local_fp): logger.info('Another process downloaded src image %s', local_fp) remove(tmp_file.name) else: safe_rename(tmp_file.name, local_fp) logger.info("Copied %s to %s", source_url, local_fp) # Check for rules file associated with image file # These files are < 2k in size, so fetch in one go. # Assumes that the rules will be next to the image # cache_dir is image specific, so this is easy bits = split(source_url) fn = bits[1].rsplit('.')[0] + "." + self.auth_rules_ext rules_url = bits[0] + '/' + fn try: resp = requests.get(rules_url) if resp.status_code == 200: local_rules_fp = join(cache_dir, "loris_cache." + self.auth_rules_ext) if not exists(local_rules_fp): with open(local_rules_fp, 'w') as fh: fh.write(resp.text) except: # No connection available pass return local_fp
def raise_404_for_ident(self, ident): raise ResolverException("Image not found for identifier: %r." % ident)
def raise_404_for_ident(self, ident): message = 'Source image not found for identifier: %s.' % (ident, ) logger.warn(message) raise ResolverException(message)
def raise_404_for_ident(self, ident): '''Log the failure and throw a 404.''' s3_key_name = self.s3_key_name(ident) logger.warn('Key %s not found in bucket: %s.', s3_key_name, self.s3_bucket) raise ResolverException('Key %s not found.' % s3_key_name)
def raise_404_for_ident(self, ident): message = 'Image not found for identifier: %s.' % (ident) raise ResolverException(404, message)
def copy_to_cache(self, ident): """ downloads image source file from s3, if not in cache already.""" ident = unquote(ident) # get source image and write to temporary file (bucketname, keyname) = self.s3bucket_from_ident(ident) try: s3obj = self.s3.Object(bucketname, keyname) content_type = s3obj.content_type except Exception as e: msg = "no content_type for s3 object ({}:{}): {}".format( bucketname, keyname, e) logger.error(msg) raise ResolverException(msg) extension = self.cache_file_extension(ident, content_type) cache_dir = self.cache_dir_path(ident) os.makedirs(cache_dir, exist_ok=True) local_fp = os.path.join(cache_dir, "loris_cache." + extension) with tempfile.NamedTemporaryFile(dir=cache_dir, delete=False) as tmp_file: try: self.s3.Bucket(bucketname).download_fileobj(keyname, tmp_file) except Exception as e: msg = "unable to access or save s3 object ({}:{}): {}".format( bucketname, keyname, e) logger.error(msg) raise ResolverException(msg) # Now rename the temp file to the desired file name if it still # doesn't exist (another process could have created it). # # Note: This is purely an optimisation; if the file springs into # existence between the existence check and the copy, it will be # overridden. if os.path.exists(local_fp): logger.info( "Another process downloaded src image {}".format(local_fp)) os.remove(tmp_file.name) else: safe_rename(tmp_file.name, local_fp) logger.info("Copied {}:{} to {}".format(bucketname, keyname, local_fp)) # Check for rules file associated with image file # These files are < 2k in size, so fetch in one go. # Assumes that the rules will be next to the image # cache_dir is image specific, so this is easy bits = os.path.split(keyname) # === bash basename fn = bits[1].rsplit(".")[0] + "." + self.auth_rules_ext rules_keyname = bits[0] + "/" + fn local_rules_fp = os.path.join(cache_dir, "loris_cache." + self.auth_rules_ext) try: self.s3.Object(bucketname, rules_keyname).download_file(local_rules_fp) except Exception as e: # no connection available? msg = "ignoring rules file({}/{}) for ident({}): {}".format( bucketname, rules_keyname, ident, e) logger.warn(msg) return local_fp