Example #1
0
    def createHTML(self, parent, token, page):
        headings = self.extension.binContent(page, token['location'],
                                             ContentExtension.FOLDER)
        links = self.extension.get('source_links')

        # Build lists
        for head in sorted(headings.keys()):
            items = headings[head]
            if head:
                h = html.Tag(parent,
                             'h{:d}'.format(int(token['level'])),
                             class_='moose-a-to-z')
                if head in links:
                    p = self.translator.findPage(links[head])
                    dest = p.relativeDestination(page)
                    html.Tag(h, 'a', href=dest, string=str(head) + ' ')
                else:
                    html.String(h, content=str(head))

            row = html.Tag(parent, 'div', class_='row')
            for chunk in mooseutils.make_chunks(list(items), 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, path, _ in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li,
                             'a',
                             href=path,
                             string=str(text.replace('.md', '')))
Example #2
0
    def _run(self, nodes, container, target, num_threads=1):
        """Helper function for running in parallel using Pipe"""

        # Create connection objects representing the receiver and sender ends of the pipe.
        receivers = []
        random.shuffle(nodes)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            r, s = self._ctx.Pipe(False)
            receivers.append(r)
            self._ctx.Process(target=target, args=(chunk, s)).start()
            s.close() # need to close this instance because a copy was sent to the Process() object

        # Iterate through the list of ready connection objects, i.e., those that either have data to
        # receive or their corresponding sender connection has been closed, until all are removed
        # from the list of pending connections. If there is no data to receive and the sender has
        # been closed, then an EOFError is raised indicating that the receiver can be removed.
        while receivers:
            for r in [r for r in receivers if r.poll() or r.closed]:
                try:
                    data = r.recv()
                except EOFError:
                    receivers.remove(r)
                else:
                    for uid, attributes, out in data:
                        self._getPage(uid).attributes.update(attributes)
                        if container is not None:
                            container[uid] = out
Example #3
0
    def build(self, num_threads=multiprocessing.cpu_count()):
        """
        Build all the pages in parallel.
        """
        if self._root is None:
            raise mooseutils.MooseException(
                "The 'init' method must be called prior to build.")

        # Build the complete markdown file tree from the configuration supplied at construction
        if not isinstance(self._root, anytree.NodeMixin):
            raise TypeError(
                "The 'buildNodes' method must return a anytree.NodeMixin object."
            )

        # Build the pages
        pages = list(self)
        jobs = []
        for chunk in mooseutils.make_chunks(pages, num_threads):
            p = multiprocessing.Process(target=self.buildPages, args=(chunk, ))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()

        self.copyFiles()
Example #4
0
    def _run(self, nodes, container, target, num_threads=1, prefix='Running'):
        """Helper function for running in parallel using Pipe"""

        # Time the process
        t = time.time()
        LOG.info('%s using %s threads...', prefix, num_threads)

        # Tokenization
        jobs = []
        conn1, conn2 = self._ctx.Pipe(False)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = self._ctx.Process(target=target, args=(chunk, conn2))
            p.start()
            jobs.append(p)

        while any(job.is_alive() for job in jobs):
            if conn1.poll():
                for uid, attributes, out in conn1.recv():
                    for node in nodes:
                        if uid == node.uid:
                            node.attributes.update(attributes)
                            break
                    if container is not None:
                        container[uid] = out

        LOG.info('Finished %s [%s sec.]', prefix, time.time() - t)
Example #5
0
    def execute(self, nodes, num_threads=1, read=True, tokenize=True, render=True, write=True):
        if read and not self._page_content:
            self._page_content = self._manager.dict({p.uid: None for p in self._page_objects})
        if tokenize and not self._page_ast:
            self._page_ast = self._manager.dict({p.uid: None for p in self._page_objects})
        if render and not self._page_result:
            self._page_result = self._manager.dict({p.uid: None for p in self._page_objects})

        # Initialize a manager object dictionary with the current attributes of Page objects
        page_attributes = self._manager.dict({p.uid: p.attributes for p in self._page_objects})

        # Distribute nodes to threads and process the execute methods on each
        jobs = []
        random.shuffle(nodes)
        args = (self._ctx.Barrier(num_threads), page_attributes, read, tokenize, render, write)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = self._ctx.Process(target=self._target, args=(chunk, *args))
            jobs.append(p)
            p.start()

        for job in jobs:
            job.join()

        # This is needed to maintain the page attributes during live serving. In parallel, when the
        # Executioner executes each process created above gets a copy of self._page_objects. Each
        # process is running the _target method and keeping the attributes of the pages up to date
        # across the processes. This call updates the attributes of the original pages that
        # were copied when the processes start. Thus, when new processes are started during a
        # live reload the attributes are correct when the copy is performed again for the new
        # processes.
        self._updateAttributes(page_attributes)
Example #6
0
    def execute(self, nodes, num_threads=1):
        """Perform the translation with multiprocessing."""

        barrier = multiprocessing.Barrier(num_threads)
        manager = multiprocessing.Manager()
        page_attributes = manager.list([None] * len(self._page_objects))

        # Initialize the page attributes container using the existing list of Page node objects
        for i in range(len(page_attributes)):
            Executioner.setMutable(self._page_objects[i], True)
            page_attributes[i] = self._page_objects[i].attributes
            Executioner.setMutable(self._page_objects[i], False)

        jobs = []
        random.shuffle(nodes)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self._target,
                                        args=(chunk, barrier, page_attributes))
            jobs.append(p)
            p.start()

        for job in jobs:
            job.join()

        # This is needed to maintain the page attributes during live serving. In parallel, when the
        # Executioner executes each process created above gets a copy of self._page_objects. Each
        # process is running the _target method and keeping the attributes of the pages up to date
        # across the processes. This call updates the attributes of the original pages that
        # were copied when the processes start. Thus, when new processes are started during a
        # live reload the attributes are correct when the copy is performed again for the new
        # processes.
        self._updateAttributes(page_attributes)
Example #7
0
    def _run(self, nodes, container, target, num_threads=1, prefix='Running'):
        """Helper function for running in parallel using Pipe"""

        # Time the process
        t = time.time()
        LOG.info('%s using %s threads...', prefix, num_threads)

        # Tokenization
        jobs = []
        conn1, conn2 = multiprocessing.Pipe(False)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=target, args=(chunk, conn2))
            p.start()
            jobs.append(p)

        while any(job.is_alive() for job in jobs):
            if conn1.poll():
                data = conn1.recv()
                for uid, attributes, out in data:
                    node = self._page_objects[uid]
                    Executioner.setMutable(node, True)
                    node.attributes.update(attributes)
                    Executioner.setMutable(node, False)

                    if container is not None:
                        container[uid] = out

        LOG.info('Finished %s [%s sec.]', prefix, time.time() - t)
Example #8
0
    def createMaterialize(self, parent, token, page):

        # Initialized alphabetized storage
        headings = dict()
        for letter in 'ABCDEFGHIJKLNMOPQRSTUVWXYZ':
            headings[letter] = dict()

        # Extract headings, default to filename if a heading is not found
        func = lambda n: n.local.startswith(token['location']) and isinstance(
            n, pages.Source)
        for node in self.translator.findPages(func):
            h_node = heading.find_heading(self.translator, node)
            if h_node is not None:
                r = html.Tag(None, 'span')
                self.renderer.render(r, h_node, page)
                key = r.text()
            else:
                r = None
                key = node.name

            letter = key[0].upper()
            headings[letter][key] = node.relativeDestination(page)

        # Buttons
        buttons = html.Tag(parent, 'div', class_='moose-a-to-z-buttons')
        if not token['buttons']:
            buttons.parent = None

        # Build lists
        for letter, items in headings.iteritems():
            id_ = uuid.uuid4()
            btn = html.Tag(buttons,
                           'a',
                           string=unicode(letter),
                           class_='btn moose-a-to-z-button',
                           href='#{}'.format(id_))

            if not items:
                btn.addClass('disabled')
                continue

            html.Tag(parent,
                     'h{}'.format(token['level']),
                     class_='moose-a-to-z',
                     id_=unicode(id_),
                     string=unicode(letter))

            row = html.Tag(parent, 'div', class_='row')

            links = [(text, href) for text, href in items.iteritems()]
            for chunk in mooseutils.make_chunks(links, 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, href in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li, 'a', href=href, string=unicode(text))
Example #9
0
    def createMaterialize(self, parent, token, page):

        # Initialized alphabetized storage
        headings = dict()
        for letter in 'ABCDEFGHIJKLNMOPQRSTUVWXYZ':
            headings[letter] = dict()

        # Extract headings, default to filename if a heading is not found
        func = lambda n: n.local.startswith(token['location']) and isinstance(n, pages.Source)
        for node in self.translator.findPages(func):
            h_node = heading.find_heading(self.translator, node)
            if h_node is not None:
                r = html.Tag(None, 'span')
                self.renderer.render(r, h_node, page)
                key = r.text()
            else:
                r = None
                key = node.name

            letter = key[0].upper()
            headings[letter][key] = node.relativeDestination(page)

        # Buttons
        buttons = html.Tag(parent, 'div', class_='moose-a-to-z-buttons')
        if not token['buttons']:
            buttons.parent = None

        # Build lists
        for letter, items in headings.iteritems():
            id_ = uuid.uuid4()
            btn = html.Tag(buttons, 'a',
                           string=unicode(letter),
                           class_='btn moose-a-to-z-button',
                           href='#{}'.format(id_))

            if not items:
                btn.addClass('disabled')
                continue

            html.Tag(parent, 'h{}'.format(token['level']),
                     class_='moose-a-to-z',
                     id_=unicode(id_),
                     string=unicode(letter))

            row = html.Tag(parent, 'div', class_='row')

            links = [(text, href) for text, href in items.iteritems()]
            for chunk in mooseutils.make_chunks(links, 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, href in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li, 'a', href=href, string=unicode(text))
Example #10
0
    def execute(self, nodes, num_threads=1):
        """Perform parallel conversion using multiprocessing Pipe."""

        if num_threads > len(nodes):
            num_threads = len(nodes)

        # Tokenization
        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            conn1, conn2 = multiprocessing.Pipe(False)
            p = multiprocessing.Process(target=self.__tokenize_target,
                                        args=(chunk, conn2))
            p.start()
            jobs.append((p, conn1, conn2))

        # Finish the jobs and collect data from the Pipe
        while any(job[0].is_alive() for job in jobs):
            for job, conn1, conn2 in jobs:
                if conn1.poll():
                    uid = conn1.recv()
                    if uid == ParallelPipe.PROCESS_FINISHED:
                        conn1.close()
                        job.join()
                        continue

                    self._tree_data[uid] = conn1.recv()
                    self._meta_data[uid] = conn1.recv()

        self._ast_available = True

        # Rendering
        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self.__render_target,
                                        args=(chunk, ))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()
Example #11
0
    def execute(self, nodes, num_threads=1):
        """Perform parallel conversion using multiprocessing Pipe."""

        if num_threads > len(nodes):
            num_threads = len(nodes)

        # Tokenization
        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            conn1, conn2 = multiprocessing.Pipe(False)
            p = multiprocessing.Process(target=self.__tokenize_target, args=(chunk, conn2))
            p.start()
            jobs.append((p, conn1, conn2))

        # Finish the jobs and collect data from the Pipe
        while any(job[0].is_alive() for job in jobs):
            for job, conn1, conn2 in jobs:
                if conn1.poll():
                    uid = conn1.recv()
                    if uid == ParallelPipe.PROCESS_FINISHED:
                        conn1.close()
                        job.join()
                        continue

                    self._tree_data[uid] = conn1.recv()
                    self._meta_data[uid] = conn1.recv()

        self._ast_available = True

        # Rendering
        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self.__render_target, args=(chunk,))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()
Example #12
0
    def execute(self, nodes, num_threads=1):
        """Perform the translation with multiprocessing."""

        if num_threads > len(nodes):
            num_threads = len(nodes)

        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self.__target, args=(chunk,))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()
Example #13
0
    def execute(self, nodes, num_threads=1):
        """Perform the translation with multiprocessing."""

        barrier = multiprocessing.Barrier(num_threads)
        manager = multiprocessing.Manager()
        page_attributes = manager.list([None]*len(self._page_objects))

        jobs = []
        random.shuffle(nodes)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self._target, args=(chunk, barrier, page_attributes))
            jobs.append(p)
            p.start()

        for job in jobs:
            job.join()
Example #14
0
    def execute(self, nodes, num_threads=1, read=True, tokenize=True, render=True, write=True):
        page_attributes = self._manager.dict({p.uid: p.attributes for p in nodes})

        # Distribute nodes to Barrier objects and run the _target() method on each.
        jobs = []
        random.shuffle(nodes)
        args = (self._ctx.Barrier(num_threads), page_attributes, read, tokenize, render, write)
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = self._ctx.Process(target=self._target, args=(chunk, *args))
            jobs.append(p)
            p.start()

        for job in jobs:
            job.join()

        # The original copy of the 'self.page_objects' container needs to be updated to ensure that
        # the class instance retains this information over succesive invocations of this method.
        self._updateAttributes(page_attributes)
Example #15
0
    def createMaterialize(self, parent, token, page):

        headings = self.extension.binContent(page, token['location'], ContentExtension.FOLDER)

        # Build lists
        for head in sorted(headings.keys()):
            items = headings[head]
            if head:
                html.Tag(parent, 'h{:d}'.format(int(token['level'])),
                         class_='moose-a-to-z',
                         string=unicode(head))

            row = html.Tag(parent, 'div', class_='row')
            for chunk in mooseutils.make_chunks(list(items), 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, path, _ in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li, 'a', href=path, string=unicode(text.replace('.md', '')))
Example #16
0
    def execute(self, nodes, num_threads=1):
        """Perform the translation with multiprocessing."""
        if num_threads > len(nodes):
            num_threads = len(nodes)

        if sys.version_info[0] == 2:
            barrier = mooseutils.parallel.Barrier(num_threads)
        else:
            barrier = multiprocessing.Barrier(num_threads)

        jobs = []
        for chunk in mooseutils.make_chunks(nodes, num_threads):
            p = multiprocessing.Process(target=self.__target,
                                        args=(chunk, barrier))
            jobs.append(p)
            p.start()

        for job in jobs:
            job.join()
Example #17
0
    def createHTMLHelper(self, parent, token, page):

        # Initialized alphabetized storage
        headings = self.extension.binContent(page, token['location'],
                                             ContentExtension.LETTER)
        for letter in '0123456789abcdefghijklmnopqrstuvwxyz':
            if letter not in headings:
                headings[letter] = set()

        # Buttons
        buttons = html.Tag(parent, 'div', class_='moose-a-to-z-buttons')
        if not token['buttons']:
            buttons.parent = None

        # Build lists
        for letter in sorted(headings.keys()):
            items = headings[letter]
            id_ = uuid.uuid4()
            btn = html.Tag(buttons,
                           'a',
                           string=str(letter.upper()),
                           class_='btn moose-a-to-z-button',
                           href='#{}'.format(id_))

            if not items:
                btn.addClass('disabled')
                continue

            html.Tag(parent,
                     'h{:d}'.format(int(token['level'])),
                     class_='moose-a-to-z',
                     id_=str(id_),
                     string=str(letter))

            row = html.Tag(parent, 'div', class_='row')
            for chunk in mooseutils.make_chunks(list(items), 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, path, _ in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li, 'a', href=path, string=str(text))
Example #18
0
    def createMaterialize(self, parent, token, page):

        location = token['location']
        func = lambda p: p.local.startswith(location) and isinstance(
            p, pages.Source)
        nodes = self.translator.findPages(func)
        nodes.sort(key=lambda n: n.local)

        headings = collections.defaultdict(list)
        for node in nodes:
            key = tuple(
                node.local.replace(location, '').strip(os.sep).split(os.sep))
            head = key[0] if len(key) > 1 else u''
            headings[head].append((node.name, node.relativeDestination(page)))

        headings = [(h, items) for h, items in headings.iteritems()]
        headings.sort(key=lambda h: h[0])

        # Build lists
        for head, items in headings:

            if head:
                html.Tag(parent,
                         'h{}'.format(token['level']),
                         class_='moose-a-to-z',
                         string=unicode(head))

            row = html.Tag(parent, 'div', class_='row')

            for chunk in mooseutils.make_chunks(items, 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, href in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li,
                             'a',
                             href=href,
                             string=unicode(text.replace('.md', '')))
Example #19
0
    def build(self, num_threads=multiprocessing.cpu_count()):
        """
        Build all the pages in parallel.
        """
        if self._root is None:
            raise mooseutils.MooseException("The 'init' method must be called prior to build.")

        # Build the complete markdown file tree from the configuration supplied at construction
        if not isinstance(self._root, anytree.NodeMixin):
            raise TypeError("The 'buildNodes' method must return a anytree.NodeMixin object.")

        # Build the pages
        pages = list(self)
        jobs = []
        for chunk in mooseutils.make_chunks(pages, num_threads):
            p = multiprocessing.Process(target=self.buildPages, args=(chunk,))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()

        self.copyFiles()
Example #20
0
    def createMaterialize(self, parent, token, page):

        # Initialized alphabetized storage
        headings = self.extension.binContent(page, token['location'], ContentExtension.LETTER)
        for letter in 'abcdefghijklmnopqrstuvwxyz':
            if letter not in headings:
                headings[letter] = set()

        # Buttons
        buttons = html.Tag(parent, 'div', class_='moose-a-to-z-buttons')
        if not token['buttons']:
            buttons.parent = None

        # Build lists
        for letter, items in headings.iteritems():
            id_ = uuid.uuid4()
            btn = html.Tag(buttons, 'a',
                           string=unicode(letter.upper()),
                           class_='btn moose-a-to-z-button',
                           href='#{}'.format(id_))

            if not items:
                btn.addClass('disabled')
                continue

            html.Tag(parent, 'h{:d}'.format(int(token['level'])),
                     class_='moose-a-to-z',
                     id_=unicode(id_),
                     string=unicode(letter))

            row = html.Tag(parent, 'div', class_='row')
            for chunk in mooseutils.make_chunks(list(items), 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, path, _ in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li, 'a', href=path, string=unicode(text))
Example #21
0
    def createMaterialize(self, parent, token, page):

        headings = self.extension.binContent(page, token['location'],
                                             ContentExtension.FOLDER)

        # Build lists
        for head, items in headings.iteritems():

            if head:
                html.Tag(parent,
                         'h{:d}'.format(int(token['level'])),
                         class_='moose-a-to-z',
                         string=unicode(head))

            row = html.Tag(parent, 'div', class_='row')
            for chunk in mooseutils.make_chunks(list(items), 3):
                col = html.Tag(row, 'div', class_='col s12 m6 l4')
                ul = html.Tag(col, 'ul', class_='moose-a-to-z')
                for text, path, _ in chunk:
                    li = html.Tag(ul, 'li')
                    html.Tag(li,
                             'a',
                             href=path,
                             string=unicode(text.replace('.md', '')))
Example #22
0
    def execute(self, num_threads=1):
        """
        Perform parallel build for all pages.

        Inputs:
            num_threads[int]: The number of threads to use (default: 1).

        NOTICE:
        A proper parallelization for MooseDocs would be three parallel steps, with minimal
        communication.
          1. Read all the markdown files (in parallel).
          2. Perform the AST tokenization (in parallel), then communicate the completed
             AST back to the main process.
          3. Convert the AST to HTML (in parallel).
          4. Write/copy (in parallel) the completed HTML and other files (images, js, etc.).

        However, step two is problematic because python requires that the AST be pickled,
        which is possible, for communication. In doing this I realized that the pickling was a
        limiting factor and made the AST step very slow. I need to investigate this further to
        make sure I was using a non-locking pool of workers, but this was taking too much
        development time.

        The current implementation performs all four steps together, which generally works just
        fine, with one exception. The autolink extension actually interrogates the AST from other
        pages. Hence, if the other page was generated off process the information is not available.
        The current implementation will just compute the AST locally (i.e., I am performing repeated
        calculations in favor of communication). This works well enough for now, but as more
        autolinking is preformed and other similar extensions are created this could cause a slow
        down.

        Long term this should be looked into again, for now the current approach is working well.
        This new system is already an order of 4 times faster than the previous implementation and
        likely could be optimized further.

        The multiprocessing.Manager() needs to be explored, it is working to pull the JSON index
        information together.
        """
        common.check_type('num_threads', num_threads, int)
        self.__assertInitialize()

        # Log start message and time
        LOG.info("Building Pages...")
        start = time.time()

        manager = multiprocessing.Manager()
        array = manager.list()
        def target(nodes, lock):
            """Helper for building multiple nodes (i.e., a chunk for a process)."""
            for node in nodes:
                node.build()
                if isinstance(node, page.MarkdownNode):
                    node.buildIndex(self.renderer.get('home', None))
                    with lock:
                        for entry in node.index:
                            array.append(entry)

        # Complete list of nodes
        nodes = [n for n in anytree.PreOrderIter(self.root)]

        # Serial
        if num_threads == 1:
            target(nodes, self.lock)

        # Multiprocessing
        else:
            jobs = []
            for chunk in mooseutils.make_chunks(nodes, num_threads):
                p = multiprocessing.Process(target=target, args=(chunk, self.lock))
                p.start()
                jobs.append(p)

            for job in jobs:
                job.join()

        # Done
        stop = time.time()
        LOG.info("Build time %s sec.", stop - start)

        iname = os.path.join(self.destination, 'js', 'search_index.js')
        if not os.path.isdir(os.path.dirname(iname)):
            os.makedirs(os.path.dirname(iname))
        items = [v for v in array if v]
        common.write(iname, 'var index_data = {};'.format(json.dumps(items)))
Example #23
0
 def assertChunk(self, n, gold):
     out = list(mooseutils.make_chunks(self.data, n))
     self.assertEqual(out, gold)
Example #24
0
    def execute(self, num_threads=1):
        """
        Perform parallel build for all pages.

        Inputs:
            num_threads[int]: The number of threads to use (default: 1).

        NOTICE:
        A proper parallelization for MooseDocs would be three parallel steps, with minimal
        communication.
          1. Read all the markdown files (in parallel).
          2. Perform the AST tokenization (in parallel), then communicate the completed
             AST back to the main process.
          3. Convert the AST to HTML (in parallel).
          4. Write/copy (in parallel) the completed HTML and other files (images, js, etc.).

        However, step two is problematic because python requires that the AST be pickled,
        which is possible, for communication. In doing this I realized that the pickling was a
        limiting factor and made the AST step very slow. I need to investigate this further to
        make sure I was using a non-locking pool of workers, but this was taking too much
        development time.

        The current implementation performs all four steps together, which generally works just
        fine, with one exception. The autolink extension actually interrogates the AST from other
        pages. Hence, if the other page was generated off process the information is not available.
        The current implementation will just compute the AST locally (i.e., I am performing repeated
        calculations in favor of communication). This works well enough for now, but as more
        autolinking is preformed and other similar extensions are created this could cause a slow
        down.

        Long term this should be looked into again, for now the current approach is working well.
        This new system is already an order of 4 times faster than the previous implementation and
        likely could be optimized further.

        The multiprocessing.Manager() needs to be explored, it is working to pull the JSON index
        information together.
        """
        common.check_type('num_threads', num_threads, int)
        self.__assertInitialize()

        self.renderer.preExecute()

        # Log start message and time
        LOG.info("Building Pages...")
        start = time.time()

        manager = multiprocessing.Manager()
        array = manager.list()
        build_index = isinstance(self.renderer, MaterializeRenderer)

        def target(nodes, lock):
            """Helper for building multiple nodes (i.e., a chunk for a process)."""
            for node in nodes:
                node.build()
                if isinstance(node, page.MarkdownNode):
                    if build_index:
                        node.buildIndex(self.renderer.get('home', None))
                        with lock:
                            for entry in node.index:
                                array.append(entry)

        # Complete list of nodes
        nodes = [n for n in anytree.PreOrderIter(self.root)]

        # Serial
        if num_threads == 1:
            target(nodes, self.lock)

        # Multiprocessing
        else:
            jobs = []
            for chunk in mooseutils.make_chunks(nodes, num_threads):
                p = multiprocessing.Process(target=target,
                                            args=(chunk, self.lock))
                p.start()
                jobs.append(p)

            for job in jobs:
                job.join()

        # Done
        stop = time.time()
        LOG.info("Build time %s sec.", stop - start)

        if build_index:
            iname = os.path.join(self.get('destination'), 'js',
                                 'search_index.js')
            if not os.path.isdir(os.path.dirname(iname)):
                os.makedirs(os.path.dirname(iname))
            items = [v for v in array if v]
            common.write(iname,
                         'var index_data = {};'.format(json.dumps(items)))

        self.renderer.postExecute()
Example #25
0
 def assertChunk(self, n, gold):
     out = list(mooseutils.make_chunks(self.data, n))
     self.assertEqual(out, gold)