Beispiel #1
0
    def __init__(self, app):
        super(ConfluenceBuilder, self).__init__(app)

        self.cache_doctrees = {}
        self.cloud = False
        self.file_suffix = '.conf'
        self.info = ConfluenceLogger.info
        self.link_suffix = None
        self.master_doc_page_id = None
        self.metadata = {}
        self.nav_next = {}
        self.nav_prev = {}
        self.omitted_docnames = []
        self.publish_allowlist = []
        self.publish_denylist = []
        self.publish_docnames = []
        self.publisher = ConfluencePublisher()
        self.secnumbers = {}
        self.verbose = ConfluenceLogger.verbose
        self.warn = ConfluenceLogger.warn
        self._original_get_doctree = None

        # state tracking is set at initialization (not cleanup) so its content's
        # can be checked/validated on after the builder has executed (testing)
        ConfluenceState.reset()
Beispiel #2
0
    def publish_doc(self, docname, output):
        conf = self.config
        title = ConfluenceState.title(docname)

        parent_id = None
        if self.config.master_doc and self.config.confluence_page_hierarchy:
            if self.config.master_doc != docname:
                parent = ConfluenceState.parentDocname(docname)
                parent_id = ConfluenceState.uploadId(parent)
        if not parent_id:
            parent_id = self.parent_id

        data = {
            'content': output,
            'labels': [],
        }

        if self.config.confluence_global_labels:
            data['labels'].extend(self.config.confluence_global_labels)

        metadata = self.metadata[docname]
        if 'labels' in metadata:
            data['labels'].extend([v for v in metadata['labels']])

        uploaded_id = self.publisher.storePage(title, data, parent_id)
        ConfluenceState.registerUploadId(docname, uploaded_id)

        if self.config.master_doc == docname:
            self.master_doc_page_id = uploaded_id

        if conf.confluence_purge and self.legacy_pages is None:
            if conf.confluence_purge_from_master and self.master_doc_page_id:
                baseid = self.master_doc_page_id
            else:
                baseid = self.parent_id

            # if no base identifier and dry running, ignore legeacy page
            # searching as there is no initial master document to reference
            # against
            if (conf.confluence_purge_from_master
                    and conf.confluence_publish_dryrun and not baseid):
                self.legacy_pages = []
            elif self.config.confluence_adv_aggressive_search is True:
                self.legacy_pages = self.publisher.getDescendantsCompat(baseid)
            else:
                self.legacy_pages = self.publisher.getDescendants(baseid)

            # only populate a list of possible legacy assets when a user is
            # configured to check or push assets to the target space
            asset_override = conf.confluence_asset_override
            if asset_override is None or asset_override:
                for legacy_page in self.legacy_pages:
                    attachments = self.publisher.getAttachments(legacy_page)
                    self.legacy_assets[legacy_page] = attachments

        if conf.confluence_purge:
            if uploaded_id in self.legacy_pages:
                self.legacy_pages.remove(uploaded_id)
Beispiel #3
0
def intern_uri_anchor_value(docname, refuri):
    """
    determine the anchor value for an internal uri point

    This call helps determine the anchor value to use for a link to an anchor
    for an internal document. The anchor value will be parsed out of the
    provided URI checked to see if a target entry already exists (e.g. if a
    header with a preconfigured identifier can be used). If so, the target value
    will be provided. If not, the parsed/raw anchor value will be returned.

    Args:
        docname: the docname of the page to link to
        refuri: the uri

    Returns:
        the encoded text
    """

    anchor_value = None
    if '#' in refuri:
        anchor = refuri.split('#')[1]
        target_name = '{}#{}'.format(docname, anchor)

        # check if this target is reachable without an anchor; if so, use
        # the identifier value instead
        target = ConfluenceState.target(target_name)
        if target:
            anchor_value = target
        else:
            anchor_value = anchor

        anchor_value = encode_storage_format(anchor_value)

    return anchor_value
Beispiel #4
0
    def _register_doctree_title_targets(self, docname, doctree):
        """
        register title targets for a doctree

        Compiles a list of title targets which references can link against. This
        tracked expected targets for sections which are automatically generated
        in a rendered Confluence instance.

        Args:
            docname: the docname of the doctree
            doctree: the doctree to search for targets
        """

        doc_used_names = {}
        secnumbers = self.env.toc_secnumbers.get(docname, {})

        for node in doctree.traverse(nodes.title):
            if isinstance(node.parent, nodes.section):
                section_node = node.parent
                if 'ids' in section_node:
                    target = ''.join(node.astext().split())

                    # when confluence has a header that contains a link, the
                    # automatically assigned identifier removes any underscores
                    if node.next_node(addnodes.pending_xref):
                        target = target.replace('_', '')

                    if self.add_secnumbers:
                        anchorname = '#' + target
                        if anchorname not in secnumbers:
                            anchorname = ''

                        secnumber = secnumbers.get(anchorname)
                        if secnumber:
                            target = ('.'.join(map(str, secnumber)) +
                                      self.secnumber_suffix + target)

                    section_id = doc_used_names.get(target, 0)
                    doc_used_names[target] = section_id + 1
                    if section_id > 0:
                        target = '{}.{}'.format(target, section_id)

                    for id in section_node['ids']:
                        id = '{}#{}'.format(docname, id)
                        ConfluenceState.registerTarget(id, target)
    def test_parent_registration(self):
        root_doc = ConfluenceState.parentDocname('toctree')
        self.assertIsNone(root_doc, 'root toctree has a parent')

        parent_doc = ConfluenceState.parentDocname('toctree-doc1')
        self.assertEqual(parent_doc, 'toctree',
                         'toctree-doc1 parent is not root toctree')

        parent_doc = ConfluenceState.parentDocname('toctree-doc2')
        self.assertEqual(parent_doc, 'toctree',
                         'toctree-doc2 parent is not root toctree')

        parent_doc = ConfluenceState.parentDocname('toctree-doc3')
        self.assertEqual(parent_doc, 'toctree',
                         'toctree-doc3 parent is not root toctree')

        parent_doc = ConfluenceState.parentDocname('toctree-doc2a')
        self.assertEqual(parent_doc, 'toctree-doc2',
                         'toctree-doc2a parent is not toctree-doc2')
Beispiel #6
0
    def process_tree_structure(self, ordered, docname, traversed, depth=0):
        omit = False
        max_depth = self.config.confluence_max_doc_depth
        if max_depth is not None and depth > max_depth:
            omit = True
            self.omitted_docnames.append(docname)

        if not omit:
            ordered.append(docname)

        modified = False
        doctree = self.env.get_doctree(docname)
        for toctreenode in doctree.traverse(addnodes.toctree):
            if not omit and max_depth is not None:
                if (toctreenode['maxdepth'] == -1
                        or depth + toctreenode['maxdepth'] > max_depth):
                    new_depth = max_depth - depth
                    assert new_depth >= 0
                    toctreenode['maxdepth'] = new_depth
            movednodes = []
            for child in toctreenode['includefiles']:
                if child not in traversed:
                    ConfluenceState.registerParentDocname(child, docname)
                    traversed.append(child)

                    children = self.process_tree_structure(
                        ordered, child, traversed, depth + 1)
                    if children:
                        movednodes.append(children)
                        self._fix_std_labels(child, docname)

            if movednodes:
                modified = True
                toctreenode.replace_self(movednodes)
                toctreenode.parent['classes'].remove('toctree-wrapper')

        if omit:
            container = addnodes.start_of_file(docname=docname)
            container.children = doctree.children
            return container
        elif modified:
            self.env.resolve_references(doctree, docname, self)
    def _register_doctree_title_targets(self, docname, doctree):
        """
        register title targets for a doctree

        Compiles a list of title targets which references can link against. This
        tracked expected targets for sections which are automatically generated
        in a rendered Confluence instance.

        Args:
            docname: the docname of the doctree
            doctree: the doctree to search for targets
        """

        doc_used_names = {}
        secnumbers = self.env.toc_secnumbers.get(self.config.master_doc, {})

        for node in doctree.traverse(nodes.title):
            if isinstance(node.parent, nodes.section):
                section_node = node.parent
                if 'ids' in section_node:
                    title_name = ''.join(node.astext().split())

                    for id in section_node['ids']:
                        target = title_name

                        anchorname = '%s/#%s' % (docname, id)
                        if anchorname not in secnumbers:
                            anchorname = '%s/' % id

                        if self.add_secnumbers:
                            secnumber = secnumbers.get(anchorname)
                            if secnumber:
                                target = ('.'.join(map(str, secnumber)) +
                                          self.secnumber_suffix + target)

                        section_id = doc_used_names.get(target, 0)
                        doc_used_names[target] = section_id + 1
                        if section_id > 0:
                            target = '{}.{}'.format(target, section_id)

                        ConfluenceState.registerTarget(anchorname, target)
    def _process_root_document(self):
        docname = self.config.master_doc

        # Extract the title from the root document as it will be used to decide
        # which Confluence page the document will be published to.
        if (self.config.confluence_title_overrides
                and docname in self.config.confluence_title_overrides):
            doctitle = self.config.confluence_title_overrides[docname]
        else:
            doctree = self.env.get_doctree(docname)
            doctitle = self._parse_doctree_title(docname, doctree)
            if not doctitle:
                return None

        # register the title for the root document (for references, assets, ...)
        ConfluenceState.registerTitle(docname, doctitle, self.config)

        # register the root document for publishing
        self.publish_docnames.append(docname)

        return doctitle
Beispiel #9
0
    def publish_asset(self, key, docname, output, type, hash):
        conf = self.config
        publisher = self.publisher

        title = ConfluenceState.title(docname)
        page_id = ConfluenceState.uploadId(docname)

        if not page_id:
            # A page identifier may not be tracked in cases where only a subset
            # of documents are published and the target page an asset will be
            # published to was not part of the request. In this case, ask the
            # Confluence instance what the target page's identifier is.
            page_id, _ = publisher.getPage(title)
            if page_id:
                ConfluenceState.registerUploadId(docname, page_id)
            else:
                ConfluenceLogger.warn('cannot publish asset since publishing '
                                      'point cannot be found ({}): {}'.format(
                                          key, docname))
                return

        if conf.confluence_asset_override is None:
            # "automatic" management -- check if already published; if not, push
            attachment_id = publisher.storeAttachment(page_id, key, output,
                                                      type, hash)
        elif conf.confluence_asset_override:
            # forced publishing of the asset
            attachment_id = publisher.storeAttachment(page_id,
                                                      key,
                                                      output,
                                                      type,
                                                      hash,
                                                      force=True)

        if attachment_id and conf.confluence_purge:
            if page_id in self.legacy_assets:
                legacy_asset_info = self.legacy_assets[page_id]
                if attachment_id in legacy_asset_info:
                    legacy_asset_info.pop(attachment_id, None)
def process_doclink(config, refuri):
    """
    process a generated index link entry

    This call is used to process a index-generated link value -- to translate a
    docname/anchor value to a final document title/anchor value. This is to
    better prepare an index for the template to easily work with.

    Args:
        config: the active configuration
        refuri: the uri

    Returns:
        the document's title and anchor value
    """

    docname = posixpath.normpath(os.path.splitext(refuri.split('#')[0])[0])
    doctitle = ConfluenceState.title(docname)
    anchor_value = intern_uri_anchor_value(docname, refuri)

    return doctitle, anchor_value
    def test_config_hierarchy_parent_registration(self):
        ConfluenceState.reset()
        self.build(self.dataset, relax=True)

        # root toctree should not have a parent
        root_doc = ConfluenceState.parent_docname('index')
        self.assertIsNone(root_doc)

        # check various documents for expected parents
        parent_doc = ConfluenceState.parent_docname('toctree-doc1')
        self.assertEqual(parent_doc, 'index')

        parent_doc = ConfluenceState.parent_docname('toctree-doc2')
        self.assertEqual(parent_doc, 'index')

        parent_doc = ConfluenceState.parent_docname('toctree-doc3')
        self.assertEqual(parent_doc, 'index')

        parent_doc = ConfluenceState.parent_docname('toctree-doc2a')
        self.assertEqual(parent_doc, 'toctree-doc2')
def build_intersphinx(builder):
    """
    build intersphinx information from the state of the builder

    Attempt to build a series of entries for an intersphinx inventory resource
    for Confluence builder generated content. This is only supported after
    processing a publishing event where page identifiers are cached to build URI
    entries.

    Args:
        builder: the builder
    """
    def escape(string):
        return re.sub("\\s+", ' ', string)

    if builder.cloud:
        pages_part = 'pages/{}/'
    else:
        pages_part = 'pages/viewpage.action?pageId={}'

    with open(path.join(builder.outdir, INVENTORY_FILENAME), 'wb') as f:
        # header
        f.write(('# Sphinx inventory version 2\n'
                 '# Project: %s\n'
                 '# Version: %s\n'
                 '# The remainder of this file is compressed using zlib.\n' %
                 (escape(builder.env.config.project),
                  escape(builder.env.config.version))).encode())

        # contents
        compressor = zlib.compressobj(9)

        for domainname, domain in sorted(builder.env.domains.items()):
            if domainname == 'std':
                for name, dispname, typ, docname, raw_anchor, prio in sorted(
                        domain.get_objects()):

                    page_id = ConfluenceState.uploadId(docname)
                    if not page_id:
                        continue

                    target_name = '{}#{}'.format(docname, raw_anchor)
                    target = ConfluenceState.target(target_name)

                    if raw_anchor and target:
                        title = ConfluenceState.title(docname)
                        anchor = 'id-' + title + '-' + target
                        anchor = anchor.replace(' ', '')

                        # confluence will convert quotes to right-quotes for
                        # anchor values; replace and encode the anchor value
                        anchor = anchor.replace('"', '”')
                        anchor = anchor.replace("'", '’')
                        anchor = requests.utils.quote(anchor)
                    else:
                        anchor = ''

                    uri = pages_part.format(page_id)
                    if anchor:
                        uri += '#' + anchor
                    if dispname == name:
                        dispname = '-'
                    entry = ('%s %s:%s %s %s %s\n' %
                             (name, domainname, typ, prio, uri, dispname))
                    ConfluenceLogger.verbose('(intersphinx) ' + entry.strip())
                    f.write(compressor.compress(entry.encode('utf-8')))

        f.write(compressor.flush())
Beispiel #13
0
 def tearDown(self):
     ConfluenceState.reset()
Beispiel #14
0
 def _register_title(self, title):
     with skip_warningiserror():
         return ConfluenceState.register_title('mock', title, self.config)
Beispiel #15
0
 def setUp(self):
     ConfluenceState.reset()
     self.config = MockedConfig()
Beispiel #16
0
    def prepare_writing(self, docnames):
        ordered_docnames = []
        traversed = [self.config.master_doc]

        # prepare caching doctree hook
        #
        # We'll temporarily override the environment's 'get_doctree' method to
        # allow this extension to manipulate the doctree for a document inside
        # the pre-writing stage to also take effect in the writing stage.
        self._original_get_doctree = self.env.get_doctree
        self.env.get_doctree = self._get_doctree

        # process the document structure of the master document, allowing:
        #  - populating a publish order to ensure parent pages are created first
        #     (when using hierarchy mode)
        #  - squash pages which exceed maximum depth (if configured with a max
        #     depth value)
        self.process_tree_structure(ordered_docnames, self.config.master_doc,
                                    traversed)

        # track relations between accepted documents
        #
        # Prepares a relation mapping between each non-orphan documents which
        # can be used by navigational elements.
        prevdoc = ordered_docnames[0] if ordered_docnames else None
        for docname in ordered_docnames[1:]:
            self.nav_prev[docname] = self.get_relative_uri(docname, prevdoc)
            self.nav_next[prevdoc] = self.get_relative_uri(prevdoc, docname)
            prevdoc = docname

        # add orphans (if any) to the publish list
        ordered_docnames.extend(x for x in docnames if x not in traversed)

        for docname in ordered_docnames:
            doctree = self.env.get_doctree(docname)

            # acquire title from override (if any), or parse first title entity
            if (self.config.confluence_title_overrides
                    and docname in self.config.confluence_title_overrides):
                doctitle = self.config.confluence_title_overrides[docname]
            else:
                doctitle = self._parse_doctree_title(docname, doctree)

            # only register title/track for publishing if there is a title
            # value that can be applied to this document
            if doctitle:
                secnumbers = self.env.toc_secnumbers.get(docname, {})
                if self.add_secnumbers and secnumbers.get(''):
                    doctitle = ('.'.join(map(str, secnumbers[''])) +
                                self.secnumber_suffix + doctitle)

                doctitle = ConfluenceState.registerTitle(
                    docname, doctitle, self.config)

                # only publish documents that sphinx asked to prepare
                if docname in docnames:
                    self.publish_docnames.append(docname)

            # track the toctree depth for a document, which a translator can
            # use as a hint when dealing with max-depth capabilities
            toctree = first(doctree.traverse(addnodes.toctree))
            if toctree and toctree.get('maxdepth') > 0:
                ConfluenceState.registerToctreeDepth(docname,
                                                     toctree.get('maxdepth'))

            # register title targets for references
            self._register_doctree_title_targets(docname, doctree)

            # post-prepare a ready doctree
            self._prepare_doctree_writing(docname, doctree)

        # Scan for assets that may exist in the documents to be published. This
        # will find most if not all assets in the documentation set. The
        # exception is assets which may be finalized during a document's post
        # transformation stage (e.g. embedded images are converted into real
        # images in Sphinx, which is then provided to a translator). Embedded
        # images are detected during an 'doctree-resolved' hook (see __init__).
        self.assets.process(ordered_docnames)