def _handleEntry(self, path, docname, standalone=False): """ handle an asset entry When an asset is detected in a document, the information about the asset is tracked in this manager. When an asset is detected, there are considerations to be made. If an asset path has already been registered (e.g. an asset used twice), only a single asset entry will be created. If an asset matches the hash of another asset, another entry is *not created (i.e. a documentation set has duplicate assets; *with the exception of when ``standalone`` is set to ``True``). In all cases where an asset is detected, the asset reference is updated to track which document the asset belongs to. Args: path: the absolute path to the asset docname: the document name this asset was found in standalone (optional): ignore hash mappings (defaults to False) """ if path not in self.path2asset: hash = ConfluenceUtil.hashAsset(path) type_ = guess_mimetype(path, default=DEFAULT_CONTENT_TYPE) else: hash = self.path2asset[path].hash type_ = self.path2asset[path].type asset = self.path2asset.get(path, None) if not asset: hash_exists = hash in self.hash2asset if not hash_exists or standalone: # no asset entry and no hash entry (or standalone); new asset key = os.path.basename(path) # Confluence does not allow attachments with select characters. # Filter out the asset name to a compatible key value. for rep in INVALID_CHARS: key = key.replace(rep, '_') filename, file_ext = os.path.splitext(key) idx = 1 while key in self.keys: idx += 1 key = '{}_{}{}'.format(filename, idx, file_ext) self.keys.add(key) asset = ConfluenceAsset(key, path, type_, hash) self.assets.append(asset) self.path2asset[path] = asset if not hash_exists: self.hash2asset[hash] = asset else: # duplicate asset detected; build an asset alias asset = self.hash2asset[hash] self.path2asset[path] = asset else: assert (self.hash2asset[asset.hash] == asset) # track (if not already) that this document uses this asset asset.docnames.add(docname)
def guess_mimetypes(self, node: nodes.image) -> List[str]: if '?' in node['candidates']: return [] elif '*' in node['candidates']: return [guess_mimetype(node['uri'])] else: return node['candidates'].keys()
def handle(self, node: nodes.image) -> None: try: basename = os.path.basename(node['uri']) if '?' in basename: basename = basename.split('?')[0] if basename == '' or len(basename) > MAX_FILENAME_LEN: filename, ext = os.path.splitext(node['uri']) basename = sha1(filename.encode()).hexdigest() + ext basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) dirname = node['uri'].replace('://', '/').translate({ ord("?"): "/", ord("&"): "/" }) if len(dirname) > MAX_FILENAME_LEN: dirname = sha1(dirname.encode()).hexdigest() ensuredir(os.path.join(self.imagedir, dirname)) path = os.path.join(self.imagedir, dirname, basename) headers = {} if os.path.exists(path): timestamp = ceil(os.stat(path).st_mtime) # type: float headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) r = requests.get(node['uri'], headers=headers) if r.status_code >= 400: logger.warning( __('Could not fetch remote image: %s [%d]') % (node['uri'], r.status_code)) else: self.app.env.original_image_uri[path] = node['uri'] if r.status_code == 200: with open(path, 'wb') as f: f.write(r.content) last_modified = r.headers.get('last-modified') if last_modified: timestamp = rfc1123_to_epoch(last_modified) os.utime(path, (timestamp, timestamp)) mimetype = guess_mimetype(path, default='*') if mimetype != '*' and os.path.splitext(basename)[1] == '': # append a suffix if URI does not contain suffix ext = get_image_extension(mimetype) newpath = os.path.join(self.imagedir, dirname, basename + ext) os.replace(path, newpath) self.app.env.original_image_uri.pop(path) self.app.env.original_image_uri[newpath] = node['uri'] path = newpath node['candidates'].pop('?') node['candidates'][mimetype] = path node['uri'] = path self.app.env.images.add_file(self.env.docname, path) except Exception as exc: logger.warning( __('Could not fetch remote image: %s [%s]') % (node['uri'], exc))
def processDocument(self, doctree, docname, standalone=False): """ process a document for assets This method will search each the provided document's doctree for supported assets which could be published. Asset information is tracked in this manager and other helper methods can be used to pull asset information when needed. Args: doctree: the document's tree docname: the document's name standalone (optional): ignore hash mappings (defaults to False) """ image_nodes = doctree.traverse(nodes.image) for node in image_nodes: uri = node['uri'] if not uri.startswith('data:') and uri.find('://') == -1: path = self._interpretAssetPath(node) if not path: continue if path not in self.path2asset: hash = ConfluenceUtil.hashAsset(path) type = guess_mimetype(path, default=DEFAULT_CONTENT_TYPE) else: hash = self.path2asset[path].hash type = self.path2asset[path].type self._handleEntry(path, type, hash, docname, standalone) file_nodes = doctree.traverse(addnodes.download_reference) for node in file_nodes: target = node['reftarget'] if target.find('://') == -1: path = self._interpretAssetPath(node) if not path: continue if path not in self.path2asset: hash = ConfluenceUtil.hashAsset(path) type = guess_mimetype(path, default=DEFAULT_CONTENT_TYPE) else: hash = self.path2asset[path].hash type = self.path2asset[path].type self._handleEntry(path, type, hash, docname, standalone)
def guess_mimetypes(self, node): # type: (nodes.Node) -> List[unicode] if '?' in node['candidates']: return [] elif '*' in node['candidates']: from sphinx.util.images import guess_mimetype return [guess_mimetype(node['uri'])] else: return node['candidates'].keys()
def test_guess_mimetype(): # guess by filename assert guess_mimetype('img.png') == 'image/png' assert guess_mimetype('img.jpg') == 'image/jpeg' assert guess_mimetype('img.txt') is None assert guess_mimetype('img.txt', default='text/plain') == 'text/plain' assert guess_mimetype('no_extension') is None assert guess_mimetype('IMG.PNG') == 'image/png' # default parameter is used when no extension assert guess_mimetype('img.png', 'text/plain') == 'image/png' assert guess_mimetype('no_extension', 'text/plain') == 'text/plain'
def handle(self, node): # type: (nodes.Node) -> None try: basename = os.path.basename(node['uri']) if '?' in basename: basename = basename.split('?')[0] if basename == '' or len(basename) > MAX_FILENAME_LEN: filename, ext = os.path.splitext(node['uri']) basename = sha1(filename.encode("utf-8")).hexdigest() + ext dirname = node['uri'].replace('://', '/').translate({ord("?"): u"/", ord("&"): u"/"}) if len(dirname) > MAX_FILENAME_LEN: dirname = sha1(dirname.encode('utf-8')).hexdigest() ensuredir(os.path.join(self.imagedir, dirname)) path = os.path.join(self.imagedir, dirname, basename) headers = {} if os.path.exists(path): timestamp = ceil(os.stat(path).st_mtime) # type: float headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) r = requests.get(node['uri'], headers=headers) if r.status_code >= 400: logger.warning(__('Could not fetch remote image: %s [%d]') % (node['uri'], r.status_code)) else: self.app.env.original_image_uri[path] = node['uri'] if r.status_code == 200: with open(path, 'wb') as f: f.write(r.content) last_modified = r.headers.get('last-modified') if last_modified: timestamp = rfc1123_to_epoch(last_modified) os.utime(path, (timestamp, timestamp)) mimetype = guess_mimetype(path, default='*') if mimetype != '*' and os.path.splitext(basename)[1] == '': # append a suffix if URI does not contain suffix ext = get_image_extension(mimetype) newpath = os.path.join(self.imagedir, dirname, basename + ext) movefile(path, newpath) self.app.env.original_image_uri.pop(path) self.app.env.original_image_uri[newpath] = node['uri'] path = newpath node['candidates'].pop('?') node['candidates'][mimetype] = path node['uri'] = path self.app.env.images.add_file(self.env.docname, path) except Exception as exc: logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], text_type(exc)))
def collect_candidates(imgpath, candidates): globbed = {} for filename in glob(imgpath): new_imgpath = relative_path(path.join(self.srcdir, 'dummy'), filename) try: mimetype = guess_mimetype(filename) if mimetype not in candidates: globbed.setdefault(mimetype, []).append(new_imgpath) except (OSError, IOError) as err: self.warn_node('image file %s not readable: %s' % (filename, err), node) for key, files in iteritems(globbed): candidates[key] = sorted(files, key=len)[0] # select by similarity
def collect_candidates(self, env, imgpath, candidates, node): # type: (BuildEnvironment, unicode, Dict[unicode, unicode], nodes.Node) -> None globbed = {} # type: Dict[unicode, List[unicode]] for filename in glob(imgpath): new_imgpath = relative_path(path.join(env.srcdir, 'dummy'), filename) try: mimetype = guess_mimetype(filename) if mimetype not in candidates: globbed.setdefault(mimetype, []).append(new_imgpath) except (OSError, IOError) as err: logger.warning(__('image file %s not readable: %s') % (filename, err), location=node, type='image', subtype='not_readable') for key, files in iteritems(globbed): candidates[key] = sorted(files, key=len)[0] # select by similarity
def collect_candidates(self, env: BuildEnvironment, imgpath: str, candidates: Dict[str, str], node: Node) -> None: globbed: Dict[str, List[str]] = {} for filename in glob(imgpath): new_imgpath = relative_path(path.join(env.srcdir, 'dummy'), filename) try: mimetype = guess_mimetype(filename) if mimetype is None: basename, suffix = path.splitext(filename) mimetype = 'image/x-' + suffix[1:] if mimetype not in candidates: globbed.setdefault(mimetype, []).append(new_imgpath) except OSError as err: logger.warning(__('image file %s not readable: %s') % (filename, err), location=node, type='image', subtype='not_readable') for key, files in globbed.items(): candidates[key] = sorted(files, key=len)[0] # select by similarity
def handle(self, node): # type: (nodes.Node) -> None basename = os.path.basename(node['uri']) if '?' in basename: basename = basename.split('?')[0] dirname = node['uri'].replace('://', '/').translate({ ord("?"): u"/", ord("&"): u"/" }) ensuredir(os.path.join(self.imagedir, dirname)) path = os.path.join(self.imagedir, dirname, basename) try: headers = {} if os.path.exists(path): timestamp = ceil(os.stat(path).st_mtime) headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) r = requests.get(node['uri'], headers=headers) if r.status_code >= 400: logger.warning('Could not fetch remote image: %s [%d]' % (node['uri'], r.status_code)) else: self.app.env.original_image_uri[path] = node['uri'] if r.status_code == 200: with open(path, 'wb') as f: f.write(r.content) last_modified = r.headers.get('last-modified') if last_modified: timestamp = rfc1123_to_epoch(last_modified) os.utime(path, (timestamp, timestamp)) mimetype = guess_mimetype(path, default='*') node['candidates'].pop('?') node['candidates'][mimetype] = path node['uri'] = path self.app.env.images.add_file(self.env.docname, path) except Exception as exc: logger.warning('Could not fetch remote image: %s [%s]' % (node['uri'], text_type(exc)))
def handle(self, node): # type: (nodes.Node) -> None basename = os.path.basename(node['uri']) if '?' in basename: basename = basename.split('?')[0] dirname = node['uri'].replace('://', '/').translate({ord("?"): u"/", ord("&"): u"/"}) ensuredir(os.path.join(self.imagedir, dirname)) path = os.path.join(self.imagedir, dirname, basename) try: headers = {} if os.path.exists(path): timestamp = ceil(os.stat(path).st_mtime) headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) r = requests.get(node['uri'], headers=headers) if r.status_code >= 400: logger.warning('Could not fetch remote image: %s [%d]' % (node['uri'], r.status_code)) else: self.app.env.original_image_uri[path] = node['uri'] if r.status_code == 200: with open(path, 'wb') as f: f.write(r.content) last_modified = r.headers.get('last-modified') if last_modified: timestamp = rfc1123_to_epoch(last_modified) os.utime(path, (timestamp, timestamp)) mimetype = guess_mimetype(path, default='*') node['candidates'].pop('?') node['candidates'][mimetype] = path node['uri'] = path self.app.env.images.add_file(self.env.docname, path) except Exception as exc: logger.warning('Could not fetch remote image: %s [%s]' % (node['uri'], text_type(exc)))
def visit_image(self, node): if 'uri' not in node or not node['uri']: self.verbose('skipping image with no uri') raise nodes.SkipNode uri = node['uri'] uri = self.encode(uri) dochost = None img_key = None img_sz = None internal_img = uri.find('://') == -1 and not uri.startswith('data:') is_svg = uri.startswith('data:image/svg+xml') or \ guess_mimetype(uri) == 'image/svg+xml' if internal_img: asset_docname = None if 'single' in self.builder.name: asset_docname = self.docname img_key, dochost, img_path = \ self.assets.fetch(node, docname=asset_docname) # if this image has not already be processed (injected at a later # stage in the sphinx process); try processing it now if not img_key: # if this is an svg image, additional processing may also needed if is_svg: confluence_supported_svg(self.builder, node) if not asset_docname: asset_docname = self.docname img_key, dochost, img_path = \ self.assets.process_image_node( node, asset_docname, standalone=True) if not img_key: self.warn('unable to find image: ' + uri) raise nodes.SkipNode # extract height, width and scale values on this image height, hu = extract_length(node.get('height')) scale = node.get('scale') width, wu = extract_length(node.get('width')) # if a scale value is provided and a height/width is not set, attempt to # determine the size of the image so that we can apply a scale value on # the detected size values if scale and not height and not width: if internal_img: img_sz = get_image_size(img_path) if img_sz is None: self.warn('could not obtain image size; :scale: option is ' 'ignored for ' + img_path) else: width = img_sz[0] wu = 'px' else: self.warn('cannot not obtain image size for external image; ' ':scale: option is ignored for ' + node['uri']) # apply scale factor to height/width fields if scale: if height: height = int(round(float(height) * scale / 100)) if width: width = int(round(float(width) * scale / 100)) # confluence only supports pixel sizes and percentage sizes in select # cases (e.g. applying a percentage width for an attached image can # result in an macro render error) -- adjust any other unit type (if # possible) to an acceptable pixel/percentage length if height: height = convert_length(height, hu) if height is None: self.warn('unsupported unit type for confluence: ' + hu) if width: width = convert_length(width, wu) if width is None: self.warn('unsupported unit type for confluence: ' + wu) # disable height/width entries for attached svgs as using these # attributes can result in a "broken image" rendering; instead, we will # track any desired height/width entry and inject them when publishing if internal_img and is_svg and (height or width): height = None hu = None width = None wu = None # [sphinx-gallery] create "thumbnail" images for sphinx-gallery # # If a sphinx-gallery-specific class type is detected for an image, # assume there is a desire for thumbnail-like images. Images are then # restricted with a specific height (a pattern observed when restricting # images to a smaller size with a Confluence editor). Although, if the # detected image size is smaller than our target, ignore any forced size # changes. if height is None and width is None and internal_img and not is_svg: if 'sphx-glr-multi-img' in node.get('class', []): if not img_sz: img_sz = get_image_size(img_path) if not img_sz or img_sz[1] > 250: height = '250' hu = 'px' # forward image options opts = {} opts['dochost'] = dochost opts['height'] = height opts['hu'] = hu opts['key'] = img_key opts['width'] = width opts['wu'] = wu self._visit_image(node, opts)
def _convert_to_data_uri(filename): # type: (str) -> str encoded = base64.b64encode(open(filename, "rb").read()) mimetype = guess_mimetype(filename, default='*') data_uri = 'data:{};base64,{}'.format(mimetype, encoded) return data_uri
def confluence_supported_svg(builder, node): """ process an image node and ensure confluence-supported svg (if applicable) SVGs have some limitations when being presented on a Confluence instance. The following have been observed issues: 1) If an SVG file does not have an XML declaration, Confluence will fail to render an image. 2) If an `ac:image` macro is applied custom width/height values on an SVG, Confluence Confluence will fail to render the image. This call will process a provided image node and ensure an SVG is in a ready state for publishing. If a node is not an SVG, this method will do nothing. To support custom width/height fields for an SVG image, the image file itself will be modified to an expected lengths. Any hints in the documentation using width/height or scale, the desired width and height fields of an image will calculated and replaced/injected into the SVG image. Any SVG files which do not have an XML declaration will have on injected. Args: builder: the builder node: the image node to check """ uri = node['uri'] # ignore external/embedded images if uri.find('://') != -1 or uri.startswith('data:'): return # invalid uri/path uri_abspath = find_env_abspath(builder.env, builder.outdir, uri) if not uri_abspath: return # ignore non-svgs mimetype = guess_mimetype(uri_abspath) if mimetype != 'image/svg+xml': return try: with open(uri_abspath, 'rb') as f: svg_data = f.read() except (IOError, OSError) as err: builder.warn('error reading svg: %s' % err) return modified = False svg_root = xml_et.fromstring(svg_data) # determine (if possible) the svgs desired width/height svg_height = None if 'height' in svg_root.attrib: svg_height = svg_root.attrib['height'] svg_width = None if 'width' in svg_root.attrib: svg_width = svg_root.attrib['width'] # try to fallback on the viewbox attribute viewbox = False if svg_height is None or svg_width is None: if 'viewBox' in svg_root.attrib: try: _, _, svg_width, svg_height = \ svg_root.attrib['viewBox'].split(' ') viewbox = True except ValueError: pass # if tracking an svg width/height, ensure the sizes are in pixels if svg_height: svg_height, svg_height_units = extract_length(svg_height) svg_height = convert_length(svg_height, svg_height_units, pct=False) if svg_width: svg_width, svg_width_units = extract_length(svg_width) svg_width = convert_length(svg_width, svg_width_units, pct=False) # extract length/scale properties from the node height, hu = extract_length(node.get('height')) scale = node.get('scale') width, wu = extract_length(node.get('width')) # if a percentage is detected, ignore these lengths when attempting to # perform any adjustments; percentage hints for internal images will be # managed with container tags in the translator if hu == '%': height = None hu = None if wu == '%': width = None wu = None # confluence can have difficulty rendering svgs with only a viewbox entry; # if a viewbox is used, use it for the height/width if these options have # not been explicitly configured on the directive if viewbox and not height and not width: height = svg_height width = svg_width # if only one size is set, fetch (and scale) the other if width and not height: if svg_height and svg_width: height = float(width) / svg_width * svg_height else: height = width hu = wu if height and not width: if svg_height and svg_width: width = float(height) / svg_height * svg_width else: width = height wu = hu # if a scale value is provided and a height/width is not set, attempt to # determine the size of the image so that we can apply a scale value on # the detected size values if scale: if not height and svg_height: height = svg_height hu = 'px' if not width and svg_width: width = svg_width wu = 'px' # apply scale factor to height/width fields if scale: if height: height = int(round(float(height) * scale / 100)) if width: width = int(round(float(width) * scale / 100)) # confluence only supports pixel sizes -- adjust any other unit type # (if possible) to a pixel length if height: height = convert_length(height, hu, pct=False) if height is None: builder.warn('unsupported svg unit type for confluence: ' + hu) if width: width = convert_length(width, wu, pct=False) if width is None: builder.warn('unsupported svg unit type for confluence: ' + wu) # if we have a height/width to apply, adjust the svg if height and width: svg_root.attrib['height'] = str(height) svg_root.attrib['width'] = str(width) svg_data = xml_et.tostring(svg_root) modified = True # ensure xml declaration exists if not svg_data.lstrip().startswith(b'<?xml'): svg_data = XML_DEC + b'\n' + svg_data modified = True # ignore svg file if not modifications are needed if not modified: return fname = sha256(svg_data).hexdigest() + '.svg' outfn = os.path.join(builder.outdir, builder.imagedir, 'svgs', fname) # write the new svg file (if needed) if not os.path.isfile(outfn): logger.verbose('generating compatible svg of: %s' % uri) logger.verbose('generating compatible svg to: %s' % outfn) ensuredir(os.path.dirname(outfn)) try: with open(outfn, 'wb') as f: f.write(svg_data) except (IOError, OSError) as err: builder.warn('error writing svg: %s' % err) return # replace the required node attributes node['uri'] = outfn if 'height' in node: del node['height'] if 'scale' in node: del node['scale'] if 'width' in node: del node['width']
def test_guess_mimetype(testroot): # guess by filename assert guess_mimetype('img.png') == 'image/png' assert guess_mimetype('img.jpg') == 'image/jpeg' assert guess_mimetype('img.txt') is None assert guess_mimetype('img.txt', default='text/plain') == 'text/plain' assert guess_mimetype('no_extension') is None assert guess_mimetype('IMG.PNG') == 'image/png' # guess by content assert guess_mimetype(content=(testroot / GIF_FILENAME).bytes()) == 'image/gif' assert guess_mimetype(content=(testroot / PNG_FILENAME).bytes()) == 'image/png' assert guess_mimetype(content=(testroot / PDF_FILENAME).bytes()) is None assert guess_mimetype(content=(testroot / TXT_FILENAME).bytes()) is None assert guess_mimetype(content=(testroot / TXT_FILENAME).bytes(), default='text/plain') == 'text/plain' # the priority of params: filename > content > default assert guess_mimetype('img.png', content=(testroot / GIF_FILENAME).bytes(), default='text/plain') == 'image/png' assert guess_mimetype('no_extension', content=(testroot / GIF_FILENAME).bytes(), default='text/plain') == 'image/gif' assert guess_mimetype('no_extension', content=(testroot / TXT_FILENAME).bytes(), default='text/plain') == 'text/plain'