Example #1
0
def _build_html_for_file_ids(tree, start, end):
    """Write HTML files for file IDs from ``start`` to ``end``. Return None if
    all goes well, a tuple of (stringified exception, exc type, exc value, file
    ID, file path) if something goes wrong while htmlifying a file.

    This is the top-level function of an HTML worker process. Log progress to a
    file named "build-html-<start>-<end>.log".

    """
    path = '(no file yet)'
    id = -1
    try:
        # We might as well have this write its log directly rather than returning
        # them to the master process, since it's already writing the built HTML
        # directly, since that probably yields better parallelism.

        conn = connect_db(tree.target_folder)
        # TODO: Replace this ad hoc logging with the logging module (or something
        # more humane) so we can get some automatic timestamps. If we get
        # timestamps spit out in the parent process, we don't need any of the
        # timing or counting code here.
        with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log:
            # Load htmlifier plugins:
            plugins = load_htmlifiers(tree)
            for plugin in plugins:
                plugin.load(tree, conn)

            start_time = datetime.now()

            # Fetch and htmlify each document:
            for num_files, (id, path, icon, text) in enumerate(
                    conn.execute("""
                                 SELECT files.id, path, icon, trg_index.text
                                 FROM trg_index, files
                                 WHERE trg_index.id = files.id
                                 AND trg_index.id >= ?
                                 AND trg_index.id <= ?
                                 """,
                                 [start, end]),
                    1):
                dst_path = os.path.join(tree.target_folder, path + '.html')
                log.write('Starting %s.\n' % path)
                htmlify(tree, conn, icon, path, text, dst_path, plugins)

            conn.commit()
            conn.close()

            # Write time information:
            time = datetime.now() - start_time
            log.write('Finished %s files in %s.\n' % (num_files, time))
    except Exception as exc:
        type, value, traceback = exc_info()
        return format_exc(), type, value, id, path
Example #2
0
 def direct_result_eq(self, query_text, line_num):
     conn = connect_db("code", os.path.join(self._config_dir_path, "target"))
     eq_(Query(conn, query_text).direct_result(), ("main.cpp", line_num))
Example #3
0
File: app.py Project: imclab/dxr
def search(tree):
    """Search by regex, caller, superclass, or whatever."""
    # TODO: This function still does too much.
    querystring = request.values

    offset = non_negative_int(querystring.get('offset'), 0)
    limit = min(non_negative_int(querystring.get('limit'), 100), 1000)

    config = current_app.config
    www_root = config['WWW_ROOT']
    trees = config['TREES']

    # Arguments for the template:
    arguments = {
        # Common template variables
        'wwwroot': www_root,
        'generated_date': config['GENERATED_DATE']}

    error = warning = ''
    status_code = None

    if tree in trees:
        arguments['tree'] = tree

        # Connect to database
        conn = connect_db(tree, current_app.instance_path)
        if conn:
            # Parse the search query
            qtext = querystring.get('q', '')
            is_case_sensitive = querystring.get('case') == 'true'
            q = Query(conn,
                      qtext,
                      should_explain='explain' in querystring,
                      is_case_sensitive=is_case_sensitive)

            # Try for a direct result:
            if querystring.get('redirect') == 'true':
                result = q.direct_result()
                if result:
                    path, line = result
                    # TODO: Does this escape qtext properly?
                    return redirect(
                        '%s/%s/source/%s?from=%s%s#%i' %
                        (www_root,
                         tree,
                         path,
                         qtext,
                         '&case=true' if is_case_sensitive else '', line))

            # Return multiple results:
            template = 'search.html'
            start = time()
            try:
                results = list(q.results(offset, limit))
            except sqlite3.OperationalError as e:
                if e.message.startswith('REGEXP:'):
                    # Malformed regex
                    warning = e.message[7:]
                    results = []
                elif e.message.startswith('QUERY:'):
                    warning = e.message[6:]
                    results = []
                else:
                    error = 'Database error: %s' % e.message
            if not error:
                # Search template variables:
                arguments['time'] = time() - start
                arguments['query'] = qtext
                arguments['search_url'] = search_url(www_root,
                                                     arguments['tree'],
                                                     qtext,
                                                     redirect=False)
                arguments['results'] = results
                arguments['offset'] = offset
                arguments['limit'] = limit
                arguments['is_case_sensitive'] = is_case_sensitive
                arguments['tree_tuples'] = [
                        (t,
                         search_url(www_root,
                                    t,
                                    qtext,
                                    case=True if is_case_sensitive else None),
                         description)
                        for t, description in trees.iteritems()]
        else:
            error = 'Failed to establish database connection.'
    else:
        arguments['tree'] = trees.keys()[0]
        error = "Tree '%s' is not a valid tree." % tree
        status_code = 404

    if warning or error:
        arguments['error'] = error or warning

    if querystring.get('format') == 'json':
        if error:
            # Return a non-OK code so the live search doesn't try to replace
            # the results with our empty ones:
            return jsonify(arguments), status_code or 500

        # Tuples are encoded as lists in JSON, and these are not real
        # easy to unpack or read in Javascript. So for ease of use, we
        # convert to dictionaries before returning the json results.
        # If further discrepancies are introduced, please document them in
        # templating.mkd.
        arguments['results'] = [
            {'icon': icon,
             'path': path,
             'lines': [{'line_number': nb, 'line': l} for nb, l in lines]}
                for icon, path, lines in arguments['results']]
        return jsonify(arguments)

    if error:
        return render_template('error.html', **arguments), status_code or 500
    else:
        arguments['filters'] = filter_menu_items()
        return render_template('search.html', **arguments)
Example #4
0
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False):
    """Build a DXR instance.

    :arg config_path: The path to a config file
    :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults
        to whatever the config file says.
    :arg tree: A single tree to build. Defaults to all the trees in the config
        file.

    """
    # Load configuration file
    # (this will abort on inconsistencies)
    overrides = {}
    if nb_jobs:
        # TODO: Remove this brain-dead cast when we get the types right in the
        # Config object:
        overrides['nb_jobs'] = str(nb_jobs)
    config = Config(config_path, **overrides)

    skip_indexing = 'index' in config.skip_stages

    # Find trees to make, fail if requested tree isn't available
    if tree:
        trees = [t for t in config.trees if t.name == tree]
        if len(trees) == 0:
            print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree
            sys.exit(1)
    else:
        # Build everything if no tree is provided
        trees = config.trees

    # Create config.target_folder (if not exists)
    print "Generating target folder"
    ensure_folder(config.target_folder, False)
    ensure_folder(config.temp_folder, not skip_indexing)
    ensure_folder(config.log_folder, not skip_indexing)

    jinja_env = load_template_env(config.temp_folder, config.dxrroot)

    # We don't want to load config file on the server, so we just write all the
    # setting into the config.py script, simple as that.
    _fill_and_write_template(
        jinja_env,
        'config.py.jinja',
        os.path.join(config.target_folder, 'config.py'),
        dict(trees=repr(OrderedDict((t.name, t.description)
                                    for t in config.trees)),
             wwwroot=repr(config.wwwroot),
             generated_date=repr(config.generated_date),
             directory_index=repr(config.directory_index),
             default_tree=repr(config.default_tree)))

    # Create jinja cache folder in target folder
    ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache'))

    # TODO Make open-search.xml things (or make the server so it can do them!)

    # Build trees requested
    ensure_folder(os.path.join(config.target_folder, 'trees'))
    for tree in trees:
        # Note starting time
        start_time = datetime.now()

        # Create folders (delete if exists)
        ensure_folder(tree.target_folder, not skip_indexing) # <config.target_folder>/<tree.name>
        ensure_folder(tree.object_folder,                    # Object folder (user defined!)
            tree.source_folder != tree.object_folder)        # Only clean if not the srcdir
        ensure_folder(tree.temp_folder,   not skip_indexing) # <config.temp_folder>/<tree.name>
                                                             # (or user defined)
        ensure_folder(tree.log_folder,    not skip_indexing) # <config.log_folder>/<tree.name>
                                                             # (or user defined)
        # Temporary folders for plugins
        ensure_folder(os.path.join(tree.temp_folder, 'plugins'), not skip_indexing)
        for plugin in tree.enabled_plugins:     # <tree.config>/plugins/<plugin>
            ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), not skip_indexing)

        # Connect to database (exits on failure: sqlite_version, tokenizer, etc)
        conn = connect_db(tree.target_folder)

        if skip_indexing:
            print " - Skipping indexing (due to 'index' in 'skip_stages')"
        else:
            # Create database tables
            create_tables(tree, conn)

            # Index all source files (for full text search)
            # Also build all folder listing while we're at it
            index_files(tree, conn)

            # Build tree
            build_tree(tree, conn, verbose)

            # Optimize and run integrity check on database
            finalize_database(conn)

            # Commit database
            conn.commit()

        if 'html' in config.skip_stages:
            print " - Skipping htmlifying (due to 'html' in 'skip_stages')"
        else:
            print "Building HTML for the '%s' tree." % tree.name

            max_file_id = conn.execute("SELECT max(files.id) FROM files").fetchone()[0]
            if config.disable_workers:
                print " - Worker pool disabled (due to 'disable_workers')"
                _build_html_for_file_ids(tree, 0, max_file_id)
            else:
                run_html_workers(tree, config, max_file_id)

        # Close connection
        conn.commit()
        conn.close()

        # Save the tree finish time
        delta = datetime.now() - start_time
        print "(finished building '%s' in %s)" % (tree.name, delta)
Example #5
0
 def direct_result_eq(self, query_text, line_num):
     conn = connect_db('code', os.path.join(self._config_dir_path,
                                            'target'))
     eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
Example #6
0
 def direct_result_eq(self, query_text, line_num):
     dbDir = os.path.join(self._config_dir_path, 'target', 'trees', 'code')
     conn = connect_db(dbDir)
     eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
Example #7
0
def search(tree):
    """Search by regex, caller, superclass, or whatever."""
    # TODO: This function still does too much.
    querystring = request.values

    offset = non_negative_int(querystring.get('offset'), 0)
    limit = non_negative_int(querystring.get('limit'), 100)

    config = current_app.config

    # Arguments for the template:
    arguments = {
        # Common template variables
        'wwwroot': config['WWW_ROOT'],
        'tree': config['TREES'][0],
        'trees': config['TREES'],
        'config': config['TEMPLATE_PARAMETERS'],
        'generated_date': config['GENERATED_DATE']
    }

    error = warning = ''
    status_code = None

    if tree in config['TREES']:
        arguments['tree'] = tree

        # Connect to database
        conn = connect_db(tree, current_app.instance_path)
        if conn:
            # Parse the search query
            qtext = querystring.get('q', '')
            q = Query(conn, qtext, should_explain='explain' in querystring)

            # Try for a direct result:
            if querystring.get('redirect') == 'true':
                result = q.direct_result()
                if result:
                    path, line = result
                    # TODO: Does this escape qtext properly?
                    return redirect(
                        '%s/%s/source/%s?from=%s#l%i' %
                        (config['WWW_ROOT'], tree, path, qtext, line))

            # Return multiple results:
            template = 'search.html'
            start = time()
            try:
                results = list(q.results(offset, limit))
            except OperationalError as e:
                if e.message.startswith('REGEXP:'):
                    # Malformed regex
                    warning = e.message[7:]
                    results = []
                elif e.message.startswith('QUERY:'):
                    warning = e.message[6:]
                    results = []
                else:
                    error = 'Database error: %s' % e.message
            if not error:
                # Search template variables:
                arguments['query'] = qtext
                arguments['search_url'] = search_url(arguments['wwwroot'],
                                                     arguments['tree'],
                                                     qtext,
                                                     redirect=False)
                arguments['results'] = results
                arguments['offset'] = offset
                arguments['limit'] = limit
                arguments['time'] = time() - start
        else:
            error = 'Failed to establish database connection.'
    else:
        error = "Tree '%s' is not a valid tree." % tree
        status_code = 404

    if warning or error:
        arguments['error'] = error or warning

    if querystring.get('format') == 'json':
        if error:
            # Return a non-OK code so the live search doesn't try to replace
            # the results with our empty ones:
            return jsonify(arguments), status_code or 500

        # Tuples are encoded as lists in JSON, and these are not real
        # easy to unpack or read in Javascript. So for ease of use, we
        # convert to dictionaries before returning the json results.
        # If further discrepancies are introduced, please document them in
        # templating.mkd.
        arguments['results'] = [{
            'icon':
            icon,
            'path':
            path,
            'lines': [{
                'line_number': nb,
                'line': l
            } for nb, l in lines]
        } for icon, path, lines in arguments['results']]
        return jsonify(arguments)

    if error:
        return render_template('error.html', **arguments), status_code or 500
    else:
        return render_template('search.html', **arguments)
Example #8
0
 def direct_result_eq(self, query_text, line_num):
     conn = connect_db(
         'code', os.path.join(self._config_dir_path, 'target'))
     eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
Example #9
0
File: app.py Project: jonasfj/dxr
def search():
    """Search by regex, caller, superclass, or whatever."""
    # TODO: This ugly mess is marring the rest of this file. Rewrite it.

    # Load query parameters
    querystring = request.values

    # Get output format
    output_format = querystring.get("format", "html")
    if output_format not in ("html", "json"):
        output_format = "html"

    # Decide if we can redirect
    can_redirect = querystring.get("redirect", "true") == "true"

    # Find the offset and limit
    # TODO Handle parsing errors that could occur here
    offset = int(querystring.get("offset", 0))
    limit = int(querystring.get("limit", 100))

    # Get and validate tree
    tree = querystring.get("tree")
    config = current_app.config
    if tree not in config["TREES"]:
        # Arguments for the template
        arguments = {
            # Common Template Variables
            "wwwroot": config["WWW_ROOT"],
            "tree": config["TREES"][0],
            "trees": config["TREES"],
            "generated_date": config["GENERATED_DATE"],
            "config": config["TEMPLATE_PARAMETERS"],
            # Error template Variables
            "error": "Tree '%s' is not a valid tree." % tree,
        }
        template = "error.html"
    else:
        # Parse the search query
        qtext = querystring.get("q", "").decode("utf-8")
        q = Query(qtext)
        # Connect to database
        conn = connect_db(tree, current_app.instance_path)
        # Arguments for the template
        arguments = {
            # Common Template Variables
            "wwwroot": config["WWW_ROOT"],
            "tree": tree,
            "trees": config["TREES"],
            "config": config["TEMPLATE_PARAMETERS"],
            "generated_date": config["GENERATED_DATE"],
        }
        if conn:
            result = None
            if can_redirect:
                result = direct_result(conn, q)
            if result:
                path, line = result
                # TODO: Does this escape qtext properly?
                return redirect("%s/%s/%s?from=%s#l%i" % (config["WWW_ROOT"], tree, path, qtext, line))
            # Okay let's try to make search results
            template = "search.html"
            # Catching any errors from sqlite, typically, regexp errors
            error = None
            start = time()
            try:
                results = list(fetch_results(conn, q, offset, limit, querystring.has_key("explain")))
            except OperationalError, e:
                if e.message.startswith("REGEXP:"):
                    arguments["error"] = e.message[7:]
                    results = []
                elif e.message.startswith("QUERY:"):
                    arguments["error"] = e.message[6:]
                    results = []
                else:
                    arguments["error"] = "Database error '%s'" % e.message
                    template = "error.html"
            if template == "search.html":
                # Search Template Variables
                arguments["query"] = escape(qtext)
                arguments["results"] = results
                arguments["offset"] = offset
                arguments["limit"] = limit
                arguments["time"] = time() - start
        else: