def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_db(tree.target_folder) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute(""" SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def direct_result_eq(self, query_text, line_num): conn = connect_db("code", os.path.join(self._config_dir_path, "target")) eq_(Query(conn, query_text).direct_result(), ("main.cpp", line_num))
def search(tree): """Search by regex, caller, superclass, or whatever.""" # TODO: This function still does too much. querystring = request.values offset = non_negative_int(querystring.get('offset'), 0) limit = min(non_negative_int(querystring.get('limit'), 100), 1000) config = current_app.config www_root = config['WWW_ROOT'] trees = config['TREES'] # Arguments for the template: arguments = { # Common template variables 'wwwroot': www_root, 'generated_date': config['GENERATED_DATE']} error = warning = '' status_code = None if tree in trees: arguments['tree'] = tree # Connect to database conn = connect_db(tree, current_app.instance_path) if conn: # Parse the search query qtext = querystring.get('q', '') is_case_sensitive = querystring.get('case') == 'true' q = Query(conn, qtext, should_explain='explain' in querystring, is_case_sensitive=is_case_sensitive) # Try for a direct result: if querystring.get('redirect') == 'true': result = q.direct_result() if result: path, line = result # TODO: Does this escape qtext properly? return redirect( '%s/%s/source/%s?from=%s%s#%i' % (www_root, tree, path, qtext, '&case=true' if is_case_sensitive else '', line)) # Return multiple results: template = 'search.html' start = time() try: results = list(q.results(offset, limit)) except sqlite3.OperationalError as e: if e.message.startswith('REGEXP:'): # Malformed regex warning = e.message[7:] results = [] elif e.message.startswith('QUERY:'): warning = e.message[6:] results = [] else: error = 'Database error: %s' % e.message if not error: # Search template variables: arguments['time'] = time() - start arguments['query'] = qtext arguments['search_url'] = search_url(www_root, arguments['tree'], qtext, redirect=False) arguments['results'] = results arguments['offset'] = offset arguments['limit'] = limit arguments['is_case_sensitive'] = is_case_sensitive arguments['tree_tuples'] = [ (t, search_url(www_root, t, qtext, case=True if is_case_sensitive else None), description) for t, description in trees.iteritems()] else: error = 'Failed to establish database connection.' else: arguments['tree'] = trees.keys()[0] error = "Tree '%s' is not a valid tree." % tree status_code = 404 if warning or error: arguments['error'] = error or warning if querystring.get('format') == 'json': if error: # Return a non-OK code so the live search doesn't try to replace # the results with our empty ones: return jsonify(arguments), status_code or 500 # Tuples are encoded as lists in JSON, and these are not real # easy to unpack or read in Javascript. So for ease of use, we # convert to dictionaries before returning the json results. # If further discrepancies are introduced, please document them in # templating.mkd. arguments['results'] = [ {'icon': icon, 'path': path, 'lines': [{'line_number': nb, 'line': l} for nb, l in lines]} for icon, path, lines in arguments['results']] return jsonify(arguments) if error: return render_template('error.html', **arguments), status_code or 500 else: arguments['filters'] = filter_menu_items() return render_template('search.html', **arguments)
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False): """Build a DXR instance. :arg config_path: The path to a config file :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults to whatever the config file says. :arg tree: A single tree to build. Defaults to all the trees in the config file. """ # Load configuration file # (this will abort on inconsistencies) overrides = {} if nb_jobs: # TODO: Remove this brain-dead cast when we get the types right in the # Config object: overrides['nb_jobs'] = str(nb_jobs) config = Config(config_path, **overrides) skip_indexing = 'index' in config.skip_stages # Find trees to make, fail if requested tree isn't available if tree: trees = [t for t in config.trees if t.name == tree] if len(trees) == 0: print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree sys.exit(1) else: # Build everything if no tree is provided trees = config.trees # Create config.target_folder (if not exists) print "Generating target folder" ensure_folder(config.target_folder, False) ensure_folder(config.temp_folder, not skip_indexing) ensure_folder(config.log_folder, not skip_indexing) jinja_env = load_template_env(config.temp_folder, config.dxrroot) # We don't want to load config file on the server, so we just write all the # setting into the config.py script, simple as that. _fill_and_write_template( jinja_env, 'config.py.jinja', os.path.join(config.target_folder, 'config.py'), dict(trees=repr(OrderedDict((t.name, t.description) for t in config.trees)), wwwroot=repr(config.wwwroot), generated_date=repr(config.generated_date), directory_index=repr(config.directory_index), default_tree=repr(config.default_tree))) # Create jinja cache folder in target folder ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache')) # TODO Make open-search.xml things (or make the server so it can do them!) # Build trees requested ensure_folder(os.path.join(config.target_folder, 'trees')) for tree in trees: # Note starting time start_time = datetime.now() # Create folders (delete if exists) ensure_folder(tree.target_folder, not skip_indexing) # <config.target_folder>/<tree.name> ensure_folder(tree.object_folder, # Object folder (user defined!) tree.source_folder != tree.object_folder) # Only clean if not the srcdir ensure_folder(tree.temp_folder, not skip_indexing) # <config.temp_folder>/<tree.name> # (or user defined) ensure_folder(tree.log_folder, not skip_indexing) # <config.log_folder>/<tree.name> # (or user defined) # Temporary folders for plugins ensure_folder(os.path.join(tree.temp_folder, 'plugins'), not skip_indexing) for plugin in tree.enabled_plugins: # <tree.config>/plugins/<plugin> ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), not skip_indexing) # Connect to database (exits on failure: sqlite_version, tokenizer, etc) conn = connect_db(tree.target_folder) if skip_indexing: print " - Skipping indexing (due to 'index' in 'skip_stages')" else: # Create database tables create_tables(tree, conn) # Index all source files (for full text search) # Also build all folder listing while we're at it index_files(tree, conn) # Build tree build_tree(tree, conn, verbose) # Optimize and run integrity check on database finalize_database(conn) # Commit database conn.commit() if 'html' in config.skip_stages: print " - Skipping htmlifying (due to 'html' in 'skip_stages')" else: print "Building HTML for the '%s' tree." % tree.name max_file_id = conn.execute("SELECT max(files.id) FROM files").fetchone()[0] if config.disable_workers: print " - Worker pool disabled (due to 'disable_workers')" _build_html_for_file_ids(tree, 0, max_file_id) else: run_html_workers(tree, config, max_file_id) # Close connection conn.commit() conn.close() # Save the tree finish time delta = datetime.now() - start_time print "(finished building '%s' in %s)" % (tree.name, delta)
def direct_result_eq(self, query_text, line_num): conn = connect_db('code', os.path.join(self._config_dir_path, 'target')) eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
def direct_result_eq(self, query_text, line_num): dbDir = os.path.join(self._config_dir_path, 'target', 'trees', 'code') conn = connect_db(dbDir) eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
def search(tree): """Search by regex, caller, superclass, or whatever.""" # TODO: This function still does too much. querystring = request.values offset = non_negative_int(querystring.get('offset'), 0) limit = non_negative_int(querystring.get('limit'), 100) config = current_app.config # Arguments for the template: arguments = { # Common template variables 'wwwroot': config['WWW_ROOT'], 'tree': config['TREES'][0], 'trees': config['TREES'], 'config': config['TEMPLATE_PARAMETERS'], 'generated_date': config['GENERATED_DATE'] } error = warning = '' status_code = None if tree in config['TREES']: arguments['tree'] = tree # Connect to database conn = connect_db(tree, current_app.instance_path) if conn: # Parse the search query qtext = querystring.get('q', '') q = Query(conn, qtext, should_explain='explain' in querystring) # Try for a direct result: if querystring.get('redirect') == 'true': result = q.direct_result() if result: path, line = result # TODO: Does this escape qtext properly? return redirect( '%s/%s/source/%s?from=%s#l%i' % (config['WWW_ROOT'], tree, path, qtext, line)) # Return multiple results: template = 'search.html' start = time() try: results = list(q.results(offset, limit)) except OperationalError as e: if e.message.startswith('REGEXP:'): # Malformed regex warning = e.message[7:] results = [] elif e.message.startswith('QUERY:'): warning = e.message[6:] results = [] else: error = 'Database error: %s' % e.message if not error: # Search template variables: arguments['query'] = qtext arguments['search_url'] = search_url(arguments['wwwroot'], arguments['tree'], qtext, redirect=False) arguments['results'] = results arguments['offset'] = offset arguments['limit'] = limit arguments['time'] = time() - start else: error = 'Failed to establish database connection.' else: error = "Tree '%s' is not a valid tree." % tree status_code = 404 if warning or error: arguments['error'] = error or warning if querystring.get('format') == 'json': if error: # Return a non-OK code so the live search doesn't try to replace # the results with our empty ones: return jsonify(arguments), status_code or 500 # Tuples are encoded as lists in JSON, and these are not real # easy to unpack or read in Javascript. So for ease of use, we # convert to dictionaries before returning the json results. # If further discrepancies are introduced, please document them in # templating.mkd. arguments['results'] = [{ 'icon': icon, 'path': path, 'lines': [{ 'line_number': nb, 'line': l } for nb, l in lines] } for icon, path, lines in arguments['results']] return jsonify(arguments) if error: return render_template('error.html', **arguments), status_code or 500 else: return render_template('search.html', **arguments)
def direct_result_eq(self, query_text, line_num): conn = connect_db( 'code', os.path.join(self._config_dir_path, 'target')) eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
def search(): """Search by regex, caller, superclass, or whatever.""" # TODO: This ugly mess is marring the rest of this file. Rewrite it. # Load query parameters querystring = request.values # Get output format output_format = querystring.get("format", "html") if output_format not in ("html", "json"): output_format = "html" # Decide if we can redirect can_redirect = querystring.get("redirect", "true") == "true" # Find the offset and limit # TODO Handle parsing errors that could occur here offset = int(querystring.get("offset", 0)) limit = int(querystring.get("limit", 100)) # Get and validate tree tree = querystring.get("tree") config = current_app.config if tree not in config["TREES"]: # Arguments for the template arguments = { # Common Template Variables "wwwroot": config["WWW_ROOT"], "tree": config["TREES"][0], "trees": config["TREES"], "generated_date": config["GENERATED_DATE"], "config": config["TEMPLATE_PARAMETERS"], # Error template Variables "error": "Tree '%s' is not a valid tree." % tree, } template = "error.html" else: # Parse the search query qtext = querystring.get("q", "").decode("utf-8") q = Query(qtext) # Connect to database conn = connect_db(tree, current_app.instance_path) # Arguments for the template arguments = { # Common Template Variables "wwwroot": config["WWW_ROOT"], "tree": tree, "trees": config["TREES"], "config": config["TEMPLATE_PARAMETERS"], "generated_date": config["GENERATED_DATE"], } if conn: result = None if can_redirect: result = direct_result(conn, q) if result: path, line = result # TODO: Does this escape qtext properly? return redirect("%s/%s/%s?from=%s#l%i" % (config["WWW_ROOT"], tree, path, qtext, line)) # Okay let's try to make search results template = "search.html" # Catching any errors from sqlite, typically, regexp errors error = None start = time() try: results = list(fetch_results(conn, q, offset, limit, querystring.has_key("explain"))) except OperationalError, e: if e.message.startswith("REGEXP:"): arguments["error"] = e.message[7:] results = [] elif e.message.startswith("QUERY:"): arguments["error"] = e.message[6:] results = [] else: arguments["error"] = "Database error '%s'" % e.message template = "error.html" if template == "search.html": # Search Template Variables arguments["query"] = escape(qtext) arguments["results"] = results arguments["offset"] = offset arguments["limit"] = limit arguments["time"] = time() - start else: