Esempio n. 1
0
def _build_html_for_file_ids(tree, start, end):
    """Write HTML files for file IDs from ``start`` to ``end``. Return None if
    all goes well, a tuple of (stringified exception, exc type, exc value, file
    ID, file path) if something goes wrong while htmlifying a file.

    This is the top-level function of an HTML worker process. Log progress to a
    file named "build-html-<start>-<end>.log".

    """
    path = '(no file yet)'
    id = -1
    try:
        # We might as well have this write its log directly rather than returning
        # them to the master process, since it's already writing the built HTML
        # directly, since that probably yields better parallelism.

        conn = connect_database(tree)
        # TODO: Replace this ad hoc logging with the logging module (or something
        # more humane) so we can get some automatic timestamps. If we get
        # timestamps spit out in the parent process, we don't need any of the
        # timing or counting code here.
        with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log:
            # Load htmlifier plugins:
            plugins = load_htmlifiers(tree)
            for plugin in plugins:
                plugin.load(tree, conn)

            start_time = datetime.now()

            # Fetch and htmlify each document:
            for num_files, (id, path, icon, text) in enumerate(
                    conn.execute("""
                                 SELECT files.id, path, icon, trg_index.text
                                 FROM trg_index, files
                                 WHERE trg_index.id = files.id
                                 AND trg_index.id >= ?
                                 AND trg_index.id <= ?
                                 """,
                                 [start, end]),
                    1):
                dst_path = os.path.join(tree.target_folder, path + '.html')
                log.write('Starting %s.\n' % path)
                htmlify(tree, conn, icon, path, text, dst_path, plugins)

            conn.commit()
            conn.close()

            # Write time information:
            time = datetime.now() - start_time
            log.write('Finished %s files in %s.\n' % (num_files, time))
    except Exception as exc:
        type, value, traceback = exc_info()
        return format_exc(), type, value, id, path
Esempio n. 2
0
def _build_html_for_file_ids(tree, start, end):
    """Write HTML files for file IDs from ``start`` to ``end``. Return None if
    all goes well, a tuple of (stringified exception, exc type, exc value, file
    ID, file path) if something goes wrong while htmlifying a file.

    This is the top-level function of an HTML worker process. Log progress to a
    file named "build-html-<start>-<end>.log".

    """
    path = '(no file yet)'
    id = -1
    try:
        # We might as well have this write its log directly rather than returning
        # them to the master process, since it's already writing the built HTML
        # directly, since that probably yields better parallelism.

        conn = connect_database(tree)
        # TODO: Replace this ad hoc logging with the logging module (or something
        # more humane) so we can get some automatic timestamps. If we get
        # timestamps spit out in the parent process, we don't need any of the
        # timing or counting code here.
        with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log:
            # Load htmlifier plugins:
            plugins = load_htmlifiers(tree)
            for plugin in plugins:
                plugin.load(tree, conn)

            start_time = datetime.now()

            # Fetch and htmlify each document:
            for num_files, (id, path, icon, text) in enumerate(
                    conn.execute(
                        """
                                 SELECT files.id, path, icon, trg_index.text
                                 FROM trg_index, files
                                 WHERE trg_index.id = files.id
                                 AND trg_index.id >= ?
                                 AND trg_index.id <= ?
                                 """, [start, end]), 1):
                dst_path = os.path.join(tree.target_folder, path + '.html')
                log.write('Starting %s.\n' % path)
                htmlify(tree, conn, icon, path, text, dst_path, plugins)

            conn.commit()
            conn.close()

            # Write time information:
            time = datetime.now() - start_time
            log.write('Finished %s files in %s.\n' % (num_files, time))
    except Exception as exc:
        type, value, traceback = exc_info()
        return format_exc(), type, value, id, path
Esempio n. 3
0
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False):
    """Build a DXR instance.

    :arg config_path: The path to a config file
    :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults
        to whatever the config file says.
    :arg tree: A single tree to build. Defaults to all the trees in the config
        file.

    """
    # Load configuration file
    # (this will abort on inconsistencies)
    overrides = {}
    if nb_jobs:
        # TODO: Remove this brain-dead cast when we get the types right in the
        # Config object:
        overrides['nb_jobs'] = str(nb_jobs)
    config = Config(config_path, **overrides)

    # Find trees to make, fail if requested tree isn't available
    if tree:
        trees = [t for t in config.trees if t.name == tree]
        if len(trees) == 0:
            print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree
            sys.exit(1)
    else:
        # Build everything if no tree is provided
        trees = config.trees

    # Create config.target_folder (if not exists)
    print "Generating target folder"
    ensure_folder(config.target_folder, False)
    ensure_folder(config.temp_folder, True)
    ensure_folder(config.log_folder, True)

    jinja_env = load_template_env(config.temp_folder, config.template_folder)

    # We don't want to load config file on the server, so we just write all the
    # setting into the config.py script, simple as that.
    _fill_and_write_template(
        jinja_env,
        'config.py.jinja',
        os.path.join(config.target_folder, 'config.py'),
        dict(trees=repr([t.name for t in config.trees]),
             wwwroot=repr(config.wwwroot),
             template_parameters=repr(config.template_parameters),
             generated_date=repr(config.generated_date),
             directory_index=repr(config.directory_index)))

    # Create jinja cache folder in target folder
    ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache'))

    # Build root-level index.html:
    ensure_folder(os.path.join(config.target_folder, 'trees'))
    _fill_and_write_template(
        jinja_env,
        'index.html',
        os.path.join(config.target_folder, 'trees', 'index.html'),
        {'wwwroot': config.wwwroot,
          'tree': config.trees[0].name,
          'trees': [t.name for t in config.trees],
          'config': config.template_parameters,
          'generated_date': config.generated_date})
    # TODO Make open-search.xml things (or make the server so it can do them!)

    # Build trees requested
    for tree in trees:
        # Note starting time
        start_time = datetime.now()

        # Create folders (delete if exists)
        ensure_folder(tree.target_folder, True) # <config.target_folder>/<tree.name>
        ensure_folder(tree.object_folder,       # Object folder (user defined!)
            tree.source_folder != tree.object_folder) # Only clean if not the srcdir
        ensure_folder(tree.temp_folder,   True) # <config.temp_folder>/<tree.name>
                                                # (or user defined)
        ensure_folder(tree.log_folder,    True) # <config.log_folder>/<tree.name>
                                                # (or user defined)
        # Temporary folders for plugins
        ensure_folder(os.path.join(tree.temp_folder, 'plugins'), True)
        for plugin in tree.enabled_plugins:     # <tree.config>/plugins/<plugin>
            ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), True)

        # Connect to database (exits on failure: sqlite_version, tokenizer, etc)
        conn = connect_database(tree)

        # Create database tables
        create_tables(tree, conn)

        # Index all source files (for full text search)
        # Also build all folder listing while we're at it
        index_files(tree, conn)

        # Build tree
        build_tree(tree, conn, verbose)

        # Optimize and run integrity check on database
        finalize_database(conn)

        # Commit database
        conn.commit()

        # Build html
        run_html_workers(tree, conn)

        # Close connection
        conn.commit()
        conn.close()

        # Save the tree finish time
        delta = datetime.now() - start_time
        print "(finished building '%s' in %s)" % (tree.name, delta)
Esempio n. 4
0
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False):
    """Build a DXR instance.

    :arg config_path: The path to a config file
    :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults
        to whatever the config file says.
    :arg tree: A single tree to build. Defaults to all the trees in the config
        file.

    """
    # Load configuration file
    # (this will abort on inconsistencies)
    overrides = {}
    if nb_jobs:
        # TODO: Remove this brain-dead cast when we get the types right in the
        # Config object:
        overrides['nb_jobs'] = str(nb_jobs)
    config = Config(config_path, **overrides)

    # Find trees to make, fail if requested tree isn't available
    if tree:
        trees = [t for t in config.trees if t.name == tree]
        if len(trees) == 0:
            print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree
            sys.exit(1)
    else:
        # Build everything if no tree is provided
        trees = config.trees

    # Create config.target_folder (if not exists)
    print "Generating target folder"
    ensure_folder(config.target_folder, False)
    ensure_folder(config.temp_folder, True)
    ensure_folder(config.log_folder, True)

    jinja_env = load_template_env(config.temp_folder, config.template_folder)

    # We don't want to load config file on the server, so we just write all the
    # setting into the config.py script, simple as that.
    _fill_and_write_template(
        jinja_env, 'config.py.jinja',
        os.path.join(config.target_folder, 'config.py'),
        dict(trees=repr([t.name for t in config.trees]),
             wwwroot=repr(config.wwwroot),
             template_parameters=repr(config.template_parameters),
             generated_date=repr(config.generated_date),
             directory_index=repr(config.directory_index)))

    # Create jinja cache folder in target folder
    ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache'))

    # Build root-level index.html:
    ensure_folder(os.path.join(config.target_folder, 'trees'))
    _fill_and_write_template(
        jinja_env, 'index.html',
        os.path.join(config.target_folder, 'trees', 'index.html'), {
            'wwwroot': config.wwwroot,
            'tree': config.trees[0].name,
            'trees': [t.name for t in config.trees],
            'config': config.template_parameters,
            'generated_date': config.generated_date
        })
    # TODO Make open-search.xml things (or make the server so it can do them!)

    # Build trees requested
    for tree in trees:
        # Note starting time
        start_time = datetime.now()

        # Create folders (delete if exists)
        ensure_folder(tree.target_folder,
                      True)  # <config.target_folder>/<tree.name>
        ensure_folder(
            tree.object_folder,  # Object folder (user defined!)
            tree.source_folder !=
            tree.object_folder)  # Only clean if not the srcdir
        ensure_folder(tree.temp_folder,
                      True)  # <config.temp_folder>/<tree.name>
        # (or user defined)
        ensure_folder(tree.log_folder, True)  # <config.log_folder>/<tree.name>
        # (or user defined)
        # Temporary folders for plugins
        ensure_folder(os.path.join(tree.temp_folder, 'plugins'), True)
        for plugin in tree.enabled_plugins:  # <tree.config>/plugins/<plugin>
            ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin),
                          True)

        # Connect to database (exits on failure: sqlite_version, tokenizer, etc)
        conn = connect_database(tree)

        # Create database tables
        create_tables(tree, conn)

        # Index all source files (for full text search)
        # Also build all folder listing while we're at it
        index_files(tree, conn)

        # Build tree
        build_tree(tree, conn, verbose)

        # Optimize and run integrity check on database
        finalize_database(conn)

        # Commit database
        conn.commit()

        # Build html
        run_html_workers(tree, conn)

        # Close connection
        conn.commit()
        conn.close()

        # Save the tree finish time
        delta = datetime.now() - start_time
        print "(finished building '%s' in %s)" % (tree.name, delta)