def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_database(tree) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute(""" SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_database(tree) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute( """ SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False): """Build a DXR instance. :arg config_path: The path to a config file :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults to whatever the config file says. :arg tree: A single tree to build. Defaults to all the trees in the config file. """ # Load configuration file # (this will abort on inconsistencies) overrides = {} if nb_jobs: # TODO: Remove this brain-dead cast when we get the types right in the # Config object: overrides['nb_jobs'] = str(nb_jobs) config = Config(config_path, **overrides) # Find trees to make, fail if requested tree isn't available if tree: trees = [t for t in config.trees if t.name == tree] if len(trees) == 0: print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree sys.exit(1) else: # Build everything if no tree is provided trees = config.trees # Create config.target_folder (if not exists) print "Generating target folder" ensure_folder(config.target_folder, False) ensure_folder(config.temp_folder, True) ensure_folder(config.log_folder, True) jinja_env = load_template_env(config.temp_folder, config.template_folder) # We don't want to load config file on the server, so we just write all the # setting into the config.py script, simple as that. _fill_and_write_template( jinja_env, 'config.py.jinja', os.path.join(config.target_folder, 'config.py'), dict(trees=repr([t.name for t in config.trees]), wwwroot=repr(config.wwwroot), template_parameters=repr(config.template_parameters), generated_date=repr(config.generated_date), directory_index=repr(config.directory_index))) # Create jinja cache folder in target folder ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache')) # Build root-level index.html: ensure_folder(os.path.join(config.target_folder, 'trees')) _fill_and_write_template( jinja_env, 'index.html', os.path.join(config.target_folder, 'trees', 'index.html'), {'wwwroot': config.wwwroot, 'tree': config.trees[0].name, 'trees': [t.name for t in config.trees], 'config': config.template_parameters, 'generated_date': config.generated_date}) # TODO Make open-search.xml things (or make the server so it can do them!) # Build trees requested for tree in trees: # Note starting time start_time = datetime.now() # Create folders (delete if exists) ensure_folder(tree.target_folder, True) # <config.target_folder>/<tree.name> ensure_folder(tree.object_folder, # Object folder (user defined!) tree.source_folder != tree.object_folder) # Only clean if not the srcdir ensure_folder(tree.temp_folder, True) # <config.temp_folder>/<tree.name> # (or user defined) ensure_folder(tree.log_folder, True) # <config.log_folder>/<tree.name> # (or user defined) # Temporary folders for plugins ensure_folder(os.path.join(tree.temp_folder, 'plugins'), True) for plugin in tree.enabled_plugins: # <tree.config>/plugins/<plugin> ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), True) # Connect to database (exits on failure: sqlite_version, tokenizer, etc) conn = connect_database(tree) # Create database tables create_tables(tree, conn) # Index all source files (for full text search) # Also build all folder listing while we're at it index_files(tree, conn) # Build tree build_tree(tree, conn, verbose) # Optimize and run integrity check on database finalize_database(conn) # Commit database conn.commit() # Build html run_html_workers(tree, conn) # Close connection conn.commit() conn.close() # Save the tree finish time delta = datetime.now() - start_time print "(finished building '%s' in %s)" % (tree.name, delta)
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False): """Build a DXR instance. :arg config_path: The path to a config file :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults to whatever the config file says. :arg tree: A single tree to build. Defaults to all the trees in the config file. """ # Load configuration file # (this will abort on inconsistencies) overrides = {} if nb_jobs: # TODO: Remove this brain-dead cast when we get the types right in the # Config object: overrides['nb_jobs'] = str(nb_jobs) config = Config(config_path, **overrides) # Find trees to make, fail if requested tree isn't available if tree: trees = [t for t in config.trees if t.name == tree] if len(trees) == 0: print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree sys.exit(1) else: # Build everything if no tree is provided trees = config.trees # Create config.target_folder (if not exists) print "Generating target folder" ensure_folder(config.target_folder, False) ensure_folder(config.temp_folder, True) ensure_folder(config.log_folder, True) jinja_env = load_template_env(config.temp_folder, config.template_folder) # We don't want to load config file on the server, so we just write all the # setting into the config.py script, simple as that. _fill_and_write_template( jinja_env, 'config.py.jinja', os.path.join(config.target_folder, 'config.py'), dict(trees=repr([t.name for t in config.trees]), wwwroot=repr(config.wwwroot), template_parameters=repr(config.template_parameters), generated_date=repr(config.generated_date), directory_index=repr(config.directory_index))) # Create jinja cache folder in target folder ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache')) # Build root-level index.html: ensure_folder(os.path.join(config.target_folder, 'trees')) _fill_and_write_template( jinja_env, 'index.html', os.path.join(config.target_folder, 'trees', 'index.html'), { 'wwwroot': config.wwwroot, 'tree': config.trees[0].name, 'trees': [t.name for t in config.trees], 'config': config.template_parameters, 'generated_date': config.generated_date }) # TODO Make open-search.xml things (or make the server so it can do them!) # Build trees requested for tree in trees: # Note starting time start_time = datetime.now() # Create folders (delete if exists) ensure_folder(tree.target_folder, True) # <config.target_folder>/<tree.name> ensure_folder( tree.object_folder, # Object folder (user defined!) tree.source_folder != tree.object_folder) # Only clean if not the srcdir ensure_folder(tree.temp_folder, True) # <config.temp_folder>/<tree.name> # (or user defined) ensure_folder(tree.log_folder, True) # <config.log_folder>/<tree.name> # (or user defined) # Temporary folders for plugins ensure_folder(os.path.join(tree.temp_folder, 'plugins'), True) for plugin in tree.enabled_plugins: # <tree.config>/plugins/<plugin> ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), True) # Connect to database (exits on failure: sqlite_version, tokenizer, etc) conn = connect_database(tree) # Create database tables create_tables(tree, conn) # Index all source files (for full text search) # Also build all folder listing while we're at it index_files(tree, conn) # Build tree build_tree(tree, conn, verbose) # Optimize and run integrity check on database finalize_database(conn) # Commit database conn.commit() # Build html run_html_workers(tree, conn) # Close connection conn.commit() conn.close() # Save the tree finish time delta = datetime.now() - start_time print "(finished building '%s' in %s)" % (tree.name, delta)