Esempio n. 1
0
    def postExecute(self, content):
        """Build the JSON file containing the index data."""
        dest = self.translator.get('destination') #pylint: disable=no-member
        home = self.get('home')
        iname = os.path.join(dest, 'js', 'search_index.js')
        items = []

        for page in content:
            meta = self.translator.getMetaData(page, 'search')
            if meta is None:
                continue
            location = page.destination.replace(dest, home)
            for data in meta:
                url = '{}#{}'.format(location, data['bookmark'])
                items.append(dict(title=data['title'], text=data['text'], location=url))

        if not os.path.isdir(os.path.dirname(iname)):
            os.makedirs(os.path.dirname(iname))
        common.write(iname, 'var index_data = {};'.format(json.dumps(items)))
Esempio n. 2
0
    def postExecute(self, content):
        """Build the JSON file containing the index data."""
        dest = self.translator.get('destination') #pylint: disable=no-member
        home = self.get('home')
        iname = os.path.join(dest, 'js', 'search_index.js')
        items = []

        for page in content:
            meta = self.translator.getMetaData(page, 'search')
            if meta is None:
                continue
            location = page.destination.replace(dest, home)
            for data in meta:
                url = '{}#{}'.format(location, data['bookmark'])
                items.append(dict(title=data['title'], text=data['text'], location=url))

        if not os.path.isdir(os.path.dirname(iname)):
            os.makedirs(os.path.dirname(iname))
        common.write(iname, 'var index_data = {};'.format(json.dumps(items)))
Esempio n. 3
0
    def execute(self, num_threads=1):
        """
        Perform parallel build for all pages.

        Inputs:
            num_threads[int]: The number of threads to use (default: 1).

        NOTICE:
        A proper parallelization for MooseDocs would be three parallel steps, with minimal
        communication.
          1. Read all the markdown files (in parallel).
          2. Perform the AST tokenization (in parallel), then communicate the completed
             AST back to the main process.
          3. Convert the AST to HTML (in parallel).
          4. Write/copy (in parallel) the completed HTML and other files (images, js, etc.).

        However, step two is problematic because python requires that the AST be pickled,
        which is possible, for communication. In doing this I realized that the pickling was a
        limiting factor and made the AST step very slow. I need to investigate this further to
        make sure I was using a non-locking pool of workers, but this was taking too much
        development time.

        The current implementation performs all four steps together, which generally works just
        fine, with one exception. The autolink extension actually interrogates the AST from other
        pages. Hence, if the other page was generated off process the information is not available.
        The current implementation will just compute the AST locally (i.e., I am performing repeated
        calculations in favor of communication). This works well enough for now, but as more
        autolinking is preformed and other similar extensions are created this could cause a slow
        down.

        Long term this should be looked into again, for now the current approach is working well.
        This new system is already an order of 4 times faster than the previous implementation and
        likely could be optimized further.

        The multiprocessing.Manager() needs to be explored, it is working to pull the JSON index
        information together.
        """
        common.check_type('num_threads', num_threads, int)
        self.__assertInitialize()

        # Log start message and time
        LOG.info("Building Pages...")
        start = time.time()

        manager = multiprocessing.Manager()
        array = manager.list()
        def target(nodes, lock):
            """Helper for building multiple nodes (i.e., a chunk for a process)."""
            for node in nodes:
                node.build()
                if isinstance(node, page.MarkdownNode):
                    node.buildIndex(self.renderer.get('home', None))
                    with lock:
                        for entry in node.index:
                            array.append(entry)

        # Complete list of nodes
        nodes = [n for n in anytree.PreOrderIter(self.root)]

        # Serial
        if num_threads == 1:
            target(nodes, self.lock)

        # Multiprocessing
        else:
            jobs = []
            for chunk in mooseutils.make_chunks(nodes, num_threads):
                p = multiprocessing.Process(target=target, args=(chunk, self.lock))
                p.start()
                jobs.append(p)

            for job in jobs:
                job.join()

        # Done
        stop = time.time()
        LOG.info("Build time %s sec.", stop - start)

        iname = os.path.join(self.destination, 'js', 'search_index.js')
        if not os.path.isdir(os.path.dirname(iname)):
            os.makedirs(os.path.dirname(iname))
        items = [v for v in array if v]
        common.write(iname, 'var index_data = {};'.format(json.dumps(items)))
Esempio n. 4
0
    def execute(self, num_threads=1):
        """
        Perform parallel build for all pages.

        Inputs:
            num_threads[int]: The number of threads to use (default: 1).

        NOTICE:
        A proper parallelization for MooseDocs would be three parallel steps, with minimal
        communication.
          1. Read all the markdown files (in parallel).
          2. Perform the AST tokenization (in parallel), then communicate the completed
             AST back to the main process.
          3. Convert the AST to HTML (in parallel).
          4. Write/copy (in parallel) the completed HTML and other files (images, js, etc.).

        However, step two is problematic because python requires that the AST be pickled,
        which is possible, for communication. In doing this I realized that the pickling was a
        limiting factor and made the AST step very slow. I need to investigate this further to
        make sure I was using a non-locking pool of workers, but this was taking too much
        development time.

        The current implementation performs all four steps together, which generally works just
        fine, with one exception. The autolink extension actually interrogates the AST from other
        pages. Hence, if the other page was generated off process the information is not available.
        The current implementation will just compute the AST locally (i.e., I am performing repeated
        calculations in favor of communication). This works well enough for now, but as more
        autolinking is preformed and other similar extensions are created this could cause a slow
        down.

        Long term this should be looked into again, for now the current approach is working well.
        This new system is already an order of 4 times faster than the previous implementation and
        likely could be optimized further.

        The multiprocessing.Manager() needs to be explored, it is working to pull the JSON index
        information together.
        """
        common.check_type('num_threads', num_threads, int)
        self.__assertInitialize()

        self.renderer.preExecute()

        # Log start message and time
        LOG.info("Building Pages...")
        start = time.time()

        manager = multiprocessing.Manager()
        array = manager.list()
        build_index = isinstance(self.renderer, MaterializeRenderer)

        def target(nodes, lock):
            """Helper for building multiple nodes (i.e., a chunk for a process)."""
            for node in nodes:
                node.build()
                if isinstance(node, page.MarkdownNode):
                    if build_index:
                        node.buildIndex(self.renderer.get('home', None))
                        with lock:
                            for entry in node.index:
                                array.append(entry)

        # Complete list of nodes
        nodes = [n for n in anytree.PreOrderIter(self.root)]

        # Serial
        if num_threads == 1:
            target(nodes, self.lock)

        # Multiprocessing
        else:
            jobs = []
            for chunk in mooseutils.make_chunks(nodes, num_threads):
                p = multiprocessing.Process(target=target,
                                            args=(chunk, self.lock))
                p.start()
                jobs.append(p)

            for job in jobs:
                job.join()

        # Done
        stop = time.time()
        LOG.info("Build time %s sec.", stop - start)

        if build_index:
            iname = os.path.join(self.get('destination'), 'js',
                                 'search_index.js')
            if not os.path.isdir(os.path.dirname(iname)):
                os.makedirs(os.path.dirname(iname))
            items = [v for v in array if v]
            common.write(iname,
                         'var index_data = {};'.format(json.dumps(items)))

        self.renderer.postExecute()