def builddb(treecfg, dbdir): """ Post-process the build and make the SQL directory """ global big_blob # We use this all over the place, cache it here. plugins = dxr.get_active_plugins(treecfg) # Building the database--this happens as multiple phases. In the first phase, # we basically collect all of the information and organizes it. In the second # phase, we link the data across multiple languages. print "Post-processing the source files..." big_blob = {} srcdir = treecfg.sourcedir objdir = treecfg.objdir # srcdir=objdir is not a good idea if os.path.normpath(srcdir) == objdir: print('objdir must not be the same as srcdir') sys.exit(1) for plugin in plugins: if 'post_process' in plugin.__all__: big_blob[plugin.__name__] = plugin.post_process(srcdir, objdir) # Save off the raw data blob print "Storing data..." dxr.store_big_blob(treecfg, big_blob) # Build the sql for later queries. This is a combination of the main language # schema as well as plugin-specific information. The pragmas that are # executed should make the sql stage go faster. print "Building SQL..." dbname = dxr.get_database_filename(treecfg) conn = dxr.open_database(dbname, 'PRAGMA synchronous=off; PRAGMA page_size=65536;') # Import the schemata schemata = [dxr.languages.get_standard_schema()] for plugin in plugins: schemata.append(plugin.get_schema()) conn.executescript('\n'.join(schemata)) conn.commit() # Load and run the SQL def sql_generator(): for statement in dxr.languages.get_sql_statements(): yield statement for plugin in plugins: if plugin.__name__ in big_blob: plugblob = big_blob[plugin.__name__] for statement in plugin.sqlify(plugblob): yield statement for stmt in sql_generator(): if isinstance(stmt, tuple): conn.execute(stmt[0], stmt[1]) else: conn.execute(stmt) conn.commit() conn.close()
def builddb(treecfg, dbdir): """ Post-process the build and make the SQL directory """ global big_blob # We use this all over the place, cache it here. plugins = dxr.get_active_plugins(treecfg) # Building the database--this happens as multiple phases. In the first phase, # we basically collect all of the information and organizes it. In the second # phase, we link the data across multiple languages. print "Post-processing the source files..." big_blob = {} srcdir = treecfg.sourcedir objdir = treecfg.objdir for plugin in plugins: if 'post_process' in plugin.__all__: big_blob[plugin.__name__] = plugin.post_process(srcdir, objdir) # Save off the raw data blob print "Storing data..." dxr.store_big_blob(treecfg, big_blob) # Build the sql for later queries. This is a combination of the main language # schema as well as plugin-specific information. The pragmas that are # executed should make the sql stage go faster. print "Building SQL..." dbname = treecfg.tree + '.sqlite' conn = sqlite3.connect(os.path.join(dbdir, dbname)) conn.execute('PRAGMA synchronous=off') conn.execute('PRAGMA page_size=65536') # Safeguard against non-ASCII text. Let's just hope everyone uses UTF-8 conn.text_factory = str # Import the schemata schemata = [dxr.languages.get_standard_schema()] for plugin in plugins: schemata.append(plugin.get_schema()) conn.executescript('\n'.join(schemata)) conn.commit() # Load and run the SQL def sql_generator(): for statement in dxr.languages.get_sql_statements(): yield statement for plugin in plugins: if plugin.__name__ in big_blob: plugblob = big_blob[plugin.__name__] for statement in plugin.sqlify(plugblob): yield statement for stmt in sql_generator(): if isinstance(stmt, tuple): print "%s:%s" % (stmt[0],stmt[1]) conn.execute(stmt[0], stmt[1]) else: conn.execute(stmt) conn.commit() conn.close()