def run(self, repo, uri, db): self.db = db path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cnn = self.db.connect() cursor = cnn.cursor() cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repo_id = cursor.fetchone()[0] files = [] try: self.__create_table(cnn) except TableAlreadyExists: cursor.execute( statement("SELECT max(id) from file_types", db.place_holder)) id = cursor.fetchone()[0] if id is not None: DBFileType.id_counter = id + 1 files = self.__get_files_for_repository(repo_id, cursor) except Exception, e: raise ExtensionRunError(str(e))
def run (self, repo, uri, db): self.db = db self.repo = repo path = uri_to_filename (uri) if path is not None: repo_uri = repo.get_uri_for_path (path) else: repo_uri = uri path = uri_to_filename (uri) self.repo_uri = path or repo.get_uri () cnn = self.db.connect () cursor = cnn.cursor () cursor.execute (statement ("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone ()[0] # If table does not exist, the list of commits is empty, # otherwise it will be filled within the except block below commits = [] try: self.__create_table (cnn) except TableAlreadyExists: cursor.execute (statement ("SELECT max(id) from patches", db.place_holder)) id = cursor.fetchone ()[0] if id is not None: DBPatch.id_counter = id + 1 commits = self.__get_patches_for_repository (repo_id, cursor) except Exception, e: raise ExtensionRunError (str (e))
def run(self, repo, uri, db): self.db = db path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cnn = self.db.connect() cursor = cnn.cursor() cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repo_id = cursor.fetchone()[0] # If table does not exist, the list of commits is empty, # otherwise it will be filled within the except block below commits = [] try: self.__create_table(cnn) except TableAlreadyExists: cursor.execute( statement("SELECT max(id) from commits_lines", db.place_holder)) id = cursor.fetchone()[0] if id is not None: DBCommitLines.id_counter = id + 1 commits = self.__get_commits_lines_for_repository(repo_id, cursor) except Exception, e: raise ExtensionRunError(str(e))
def run(self, repo, uri, db): self.db = db path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cnn = self.db.connect() cursor = cnn.cursor() cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone()[0] files = [] try: self.__create_table(cnn) except TableAlreadyExists: cursor.execute(statement("SELECT max(id) from file_types", db.place_holder)) id = cursor.fetchone()[0] if id is not None: DBFileType.id_counter = id + 1 files = self.__get_files_for_repository(repo_id, cursor) except Exception, e: raise ExtensionRunError(str(e))
def run(self, repo, uri, db): """ Parses all the commit messages from the WordPress repository to identify code contributions made by developers without access to the repository. """ self.db = db self.db_content_handler = DBContentHandler(self.db) self.db_content_handler.begin() path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cnn = self.db.connect() cursor = cnn.cursor() cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repo_id = cursor.fetchone()[0] self.db_content_handler.repo_id = repo_id self.__maybe_create_column(cnn) cursor.execute( statement("SELECT id, message from scmlog where repository_id = ?", db.place_holder), (repo_id, )) write_cursor = cnn.cursor() rs = cursor.fetchmany() while rs: for scmlog_id, message in rs: person_id = self.__get_person_id_from_message(message) if person_id: write_cursor.execute( statement( "UPDATE scmlog SET wordpress_author_id = ? WHERE id = ?", db.place_holder), (person_id, scmlog_id)) rs = cursor.fetchmany() cnn.commit() write_cursor.close() cursor.close() cnn.close()
def __process_finished_jobs(self, job_pool, write_cursor, db): # start = datetime.now() finished_job = job_pool.get_next_done(0) processed_jobs = 0 # commit_id is the commit ID. For some reason, the # documentation advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: file_contents = None if not Config().no_content: file_contents = str(finished_job.file_contents) query = """ insert into content(commit_id, file_id, content, loc, size) values(?,?,?,?,?)""" insert_statement = statement(query, db.place_holder) parameters = (finished_job.commit_id, finished_job.file_id, file_contents, finished_job.file_number_of_lines, finished_job.file_size) execute_statement(insert_statement, parameters, write_cursor, db, "Couldn't insert, duplicate record?", exception=ExtensionRunError) processed_jobs += 1 finished_job = job_pool.get_next_done(0) return processed_jobs
def __get_patches_for_repository(self, repo_id, cursor): query = """SELECT p.commit_id from patches p, scmlog s WHERE p.commit_id = s.id and repository_id = ?""" cursor.execute(statement(query, self.db.place_holder), (repo_id, )) commits = [res[0] for res in cursor.fetchall()] return commits
def get_path(self, repo=None, repo_path=None): if not self.current: return None revision, commit_id, file_id, action_type, composed = self.current if composed: rev = revision.split("|")[0] else: rev = revision cursor = self.cnn.cursor() cursor.execute(statement(self.__path_query__, self.db.place_holder), (file_id, commit_id)) file_link = cursor.fetchone() relative_path = None if repo is None: relative_path = file_link[1] else: try: while file_link: if repo.is_ancestor(repo_path, file_link[0], rev): relative_path = file_link[1] break else: file_link = cursor.fetchone() except CommandError as e: printerr(str(e) + '\n' + e.error) cursor.close() if relative_path is None: return None else: return relative_path.strip("/")
def __process_finished_jobs(self, job_pool, write_cursor, unlocked=False): if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done() args = [] while job is not None: authors = job.get_authors() file_id = job.get_file_id() commit_id = job.get_commit_id() a = [(self.id_counter + i, file_id, commit_id, self.authors[key], authors[key]) for i, key in enumerate(authors.keys())] args.extend(a) self.id_counter += len(a) if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done(0.5) if args: write_cursor.executemany( statement(self.__insert__, self.db.place_holder), args) del args
def fetchReleaseRevisions(self): cursor = self.connection.cursor() sql = "SELECT scmlog.id, tag_revisions.tag_id " + \ "FROM scmlog, tag_revisions, actions " + \ "WHERE scmlog.id = tag_revisions.commit_id " + \ "AND actions.commit_id = scmlog.id " + \ "AND actions.branch_id = 1 " + \ "AND scmlog.repository_id=? " + \ "ORDER BY tag_revisions.tag_id, scmlog.id" cursor.execute(statement(sql, self.db.place_holder), (self.repositoryId, )) rs = cursor.fetchmany() while rs: for row in rs: releaseRevision = ReleaseRevisionEntity() release = self.releaseDict.getEntity(row[1]) commit = self.revisionDict.getCommit(row[0]) action = commit.getActions().values()[0] revision = self.revisionDict.getEntity(action.getDbId()) releaseRevision.setRelease(release) releaseRevision.setRevision(revision) self.releaseRevisionDict.add(releaseRevision) rs = cursor.fetchmany() cursor.close()
def run(self, repo, uri, db): # Start the profiler, per every other extension profiler_start("Running FileCount extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() read_cursor = connection.cursor() write_cursor = connection.cursor() # Try to get the repository and get its ID from the database try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute(statement( \ "SELECT id from repositories where uri = ?", \ db.place_holder), (repo_uri,)) repo_id = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( \ "FileCount extension is not supported for %s repos" % \ (repo.get_type())) except Exception, e: raise ExtensionRunError( \ "Error creating repository %s. Exception: %s" % \ (repo.get_uri(), str(e)))
def run(self, repo, uri, db): profiler_start("Running HunkBlame extension") self.db = db cnn = self.db.connect() read_cursor = cnn.cursor() write_cursor = cnn.cursor() try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repoid = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( "HunkBlame extension is not supported for %s repositories" % (repo.get_type())) except Exception, e: raise ExtensionRunError( "Error creating repository %s. Exception: %s" % (repo.get_uri(), str(e)))
def _do_backout(self, repo, uri, db, backout_statement): connection = db.connect() repo_cursor = connection.cursor() repo_uri = get_repo_uri(uri, repo) try: repo_id = get_repo_id(repo_uri, repo_cursor, db) except RepoNotFound: # Repository isn't in there, so it's likely already backed out printerr("Repository not found, is it in the database?") return True finally: repo_cursor.close() update_cursor = connection.cursor() execute_statement(statement(backout_statement, db.place_holder), (repo_id,), update_cursor, db, "Couldn't backout extension", exception=ExtensionBackoutError) update_cursor.close() connection.commit() connection.close()
def __get_patches_for_repository (self, repo_id, cursor): query = "SELECT p.commit_id from patches p, scmlog s " + \ "WHERE p.commit_id = s.id and repository_id = ?" cursor.execute (statement (query, self.db.place_holder), (repo_id,)) commits = [res[0] for res in cursor.fetchall ()] return commits
def __get_files_for_repository(self, repo_id, cursor): query = "SELECT ft.file_id from file_types ft, files f " + \ "WHERE f.id = ft.file_id and f.repository_id = ?" cursor.execute(statement(query, self.db.place_holder), (repo_id, )) files = [res[0] for res in cursor.fetchall()] return files
def get_file_id(self, file_path, commit_id): """Ask for the file_id for a given file_path and commit_id""" if config.debug: profiler_start("Getting file id for file_path %s and commit_id %d", (file_path, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_id from actions WHERE binary current_file_path = ? AND commit_id = ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_path, commit_id)) try: file_id = cursor.fetchone()[0] except: file_id = None cursor.close() cnn.close() if config.debug: profiler_stop("Getting file id for file_path %s and commit_id %d", (file_path, commit_id), True) return file_id
def __buildReleaseRevision(self, fileId, commitId, releaseId, resolvedFiles): cursor = self.connection.cursor() sql = "SELECT file_id " + \ "FROM file_links " + \ "WHERE parent_id=? " + \ "AND commit_id<=?" cursor.execute(statement(sql, self.db.place_holder), (fileId, commitId)) fileLinks = [] rs = cursor.fetchmany() while rs: for row in rs: fileLinks.append(row[0]) rs = cursor.fetchmany() cursor.close() if len(fileLinks) == 0: resolvedFiles.append([fileId, commitId, releaseId]) else: for fileLink in fileLinks: self.__buildReleaseRevision(fileLink, commitId, releaseId, resolvedFiles)
def fetchReleases(self): cursor = self.connection.cursor() sql = "SELECT id, name " +\ "FROM tags " + \ "WHERE id IN (SELECT DISTINCT tag_revisions.tag_id " + \ "FROM tag_revisions, scmlog, actions " + \ "WHERE tag_revisions.commit_id = scmlog.id " + \ "AND actions.commit_id = scmlog.id " + \ "AND actions.branch_id = 1 " + \ "AND scmlog.repository_id=?)" cursor.execute(statement(sql, self.db.place_holder), (self.repositoryId, )) rs = cursor.fetchmany() while rs: for row in rs: release = ReleaseEntity() release.setDbId(row[0]) release.setName(row[1]) self.releaseDict.add(release) rs = cursor.fetchmany() cursor.close()
def __get_files_for_repository(self, repo_id, cursor): query = "SELECT ft.file_id from file_types ft, files f " + \ "WHERE f.id = ft.file_id and f.repository_id = ?" cursor.execute(statement(query, self.db.place_holder), (repo_id,)) files = [res[0] for res in cursor.fetchall()] return files
def __find_previous_commit(self, file_id, commit_id): query = """select a.commit_id, a.action_type, c.rev from _action_files_cache a,scmlog c where a.commit_id=c.id and a.file_id=? order by c.date """ cnn = self.db.connect() aux_cursor = cnn.cursor() aux_cursor.execute(statement(query, self.db.place_holder), (file_id, )) all_commits = aux_cursor.fetchall() aux_cursor.close() cnn.close() pre_commit_id = None pre_rev = None for cur_commit_id, type, cur_rev in all_commits: if cur_commit_id == commit_id: #Nothing to blame for other types if type != 'M' and type != 'R': raise NotValidHunkWarning( "Wrong commit to blame: commit type: %s" % type) else: break else: pre_commit_id = cur_commit_id pre_rev = cur_rev else: raise NotValidHunkWarning( "No previous commit found for file %d at commit %d" % (file_id, commit_id)) if pre_commit_id is None or pre_rev is None: raise NotValidHunkWarning( "No previous commit found for file %d at commit %d" % (file_id, commit_id)) return pre_commit_id, pre_rev
def process_finished_jobs(self, job_pool, write_cursor, unlocked=False): if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done(0.5) args = [] processed_jobs = 0 while job is not None: if not job.failed: a = self.populate_insert_args(job) args.extend(a) self.id_counter += len(a) processed_jobs += 1 if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done(0) if len(args) > 0: write_cursor.executemany( statement(self.__insert__, self.db.place_holder), args) del args return processed_jobs
def get_path_from_database(self, file_id, commit_id): """Returns the last valid path for a given file_id at commit_id (May have been removed afterwords!)""" if config.debug: profiler_start("Getting full file path for file_id %d and \ commit_id %d", (file_id, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT current_file_path from actions WHERE file_id=? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_id, commit_id)) try: file_path = cursor.fetchone()[0] except: file_path = None cursor.close() cnn.close() printdbg("get_path_from_database:\ Path for file_id %d at commit_id %d: %s", (file_id, commit_id, file_path)) if config.debug: profiler_stop("Getting full file path for file_id %d and\ commit_id %d", (file_id, commit_id), True) return file_path
def run(self, repo, uri, db): profiler_start("Running Blame extension") self.db = db cnn = self.db.connect() read_cursor = cnn.cursor() write_cursor = cnn.cursor() blames = [] try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute(statement("SELECT id from repositories " + \ "where uri = ?", db.place_holder), (repo_uri,)) repoid = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError("Blame extension is not supported for " + \ "%s repositories" % (repo.get_type())) except Exception, e: raise ExtensionRunError("Error creating repository %s. " + \ "Exception: %s" % (repo.get_uri(), str(e)))
def get_file_id(self, file_path, commit_id): """Ask for the file_id for a given file_path and commit_id""" if config.debug: profiler_start("Getting file id for file_path %s and commit_id %d", (file_path, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_id from file_paths WHERE file_path = ? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_path, commit_id)) try: file_id = cursor.fetchone()[0] except: file_id = None cursor.close() cnn.close() if config.debug: profiler_stop("Getting file id for file_path %s and commit_id %d", (file_path, commit_id), True) return file_id
def get_path_from_database(self, file_id, commit_id): """Returns the last valid path for a given file_id at commit_id (May have been removed afterwords!)""" if config.debug: profiler_start("Getting full file path for file_id %d and \ commit_id %d", (file_id, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_path from file_paths WHERE file_id=? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_id, commit_id)) try: file_path = cursor.fetchone()[0] except: file_path = None cursor.close() cnn.close() printdbg("get_path_from_database:\ Path for file_id %d at commit_id %d: %s", (file_id, commit_id, file_path)) if config.debug: profiler_stop("Getting full file path for file_id %d and\ commit_id %d", (file_id, commit_id), True) return file_path
def run(self, repo, uri, db): profiler_start("Running PatchLOC extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() cursor = connection.cursor() path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repo_id = cursor.fetchone()[0] try: self.__create_table(connection) except TableAlreadyExists: pass except Exception, e: raise ExtensionRunError(str(e))
def __process_finished_jobs (self, job_pool, write_cursor, unlocked = False): if unlocked: job = job_pool.get_next_done_unlocked () else: job = job_pool.get_next_done () args = [] while job is not None: authors = job.get_authors () file_id = job.get_file_id () commit_id = job.get_commit_id () a = [(self.id_counter + i, file_id, commit_id, self.authors[key], authors[key]) \ for i, key in enumerate (authors.keys ())] args.extend (a) self.id_counter += len (a) if unlocked: job = job_pool.get_next_done_unlocked () else: job = job_pool.get_next_done (0.5) if args: write_cursor.executemany (statement (self.__insert__, self.db.place_holder), args) del args
def __find_previous_commit(self, file_id, commit_id): query = """select a.commit_id, a.action_type, c.rev from _action_files_cache a,scmlog c where a.commit_id=c.id and a.file_id=? order by c.date """ cnn = self.db.connect () aux_cursor = cnn.cursor() aux_cursor.execute(statement(query, self.db.place_holder),(file_id,)) all_commits=aux_cursor.fetchall() aux_cursor.close() cnn.close() pre_commit_id = None pre_rev = None for cur_commit_id,type, cur_rev in all_commits: if cur_commit_id == commit_id: #Nothing to blame for other types if type != 'M' and type != 'R': raise NotValidHunkWarning("Wrong commit to blame: commit type: %s"%type) else: break else: pre_commit_id = cur_commit_id pre_rev = cur_rev else: raise NotValidHunkWarning("No previous commit found for file %d at commit %d"%(file_id, commit_id)) if pre_commit_id is None or pre_rev is None: raise NotValidHunkWarning("No previous commit found for file %d at commit %d"%(file_id, commit_id)) return pre_commit_id,pre_rev
def __get_commits_lines_for_repository(self, repo_id, cursor): query = "SELECT cm.commit_id from commits_lines cm, scmlog s " + \ "WHERE cm.commit_id = s.id and repository_id = ?" cursor.execute(statement(query, self.db.place_holder), (repo_id, )) commits = [res[0] for res in cursor.fetchall()] return commits
def process_finished_jobs(self, job_pool, write_cursor, unlocked=False): if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done(0.5) args = [] processed_jobs = 0 while job is not None: if not job.failed: a = self.populate_insert_args(job) args.extend(a) self.id_counter += len(a) processed_jobs += 1 if unlocked: job = job_pool.get_next_done_unlocked() else: job = job_pool.get_next_done(0) if len(args) > 0: write_cursor.executemany(statement(self.__insert__, self.db.place_holder), args) del args return processed_jobs
def __get_hunk_blames(self, cursor, repoid): query = """select distinct b.hunk_id from hunk_blames b join hunks h on b.hunk_id=h.id join files f on h.file_id=f.id where f.repository_id=?""" cursor.execute (statement (query, self.db.place_holder), (repoid,)) return [h[0] for h in cursor.fetchall()]
def __get_hunk_blames(self, cursor, repoid): query = """select distinct b.hunk_id from hunk_blames b join hunks h on b.hunk_id=h.id join files f on h.file_id=f.id where f.repository_id=?""" cursor.execute(statement(query, self.db.place_holder), (repoid, )) return [h[0] for h in cursor.fetchall()]
def get_max_id(self, db): cnn = self.db.connect() cursor = cnn.cursor() cursor.execute(statement("SELECT max(id) from blame", db.place_holder)) max_id = cursor.fetchone()[0] cursor.close() cnn.close() return max_id
def getRepositoryType(db, cnn, repositoryID): cursor = cnn.cursor() cursor.execute( statement("SELECT type FROM repositories WHERE id=?", db.place_holder), (repositoryID, )) repositoryType = cursor.fetchone()[0] cursor.close() return repositoryType
def __get_path_from_db(self, file_id, commit_id): cursor = self.cnn.cursor() cursor.execute(statement(self.__path_query__, self.db.place_holder), (file_id, commit_id)) path = cursor.fetchone()[0] cursor.close () return "/" + path
def __get_path_from_db(self, file_id, commit_id): cursor = self.cnn.cursor() cursor.execute(statement(self.__path_query__, self.db.place_holder), (file_id, commit_id)) path = cursor.fetchone()[0] cursor.close() return "/" + path
def __init__(self, db, cnn, cursor, repoid): self.db = db self.cnn = cnn self.repoid = repoid self.icursor = ICursor(cursor, self.INTERVAL_SIZE) self.icursor.execute(statement(self.__query__, db.place_holder), (repoid,)) self.rs = iter(self.icursor.fetchmany()) self.prev_commit = -1 self.current = None
def __init__(self, db, cnn, cursor, repoid): self.db = db self.cnn = cnn self.repoid = repoid self.icursor = ICursor(cursor, self.INTERVAL_SIZE) self.icursor.execute(statement(self.__query__, db.place_holder), (repoid, )) self.rs = iter(self.icursor.fetchmany()) self.prev_commit = -1 self.current = None
def findRepository(db, cnn, projectName): cursor = cnn.cursor() cursor.execute( statement("SELECT id FROM repositories WHERE name=?", db.place_holder), (projectName, )) rep = cursor.fetchone()[0] cursor.close() if rep is None: raise ProjectNotFound return rep
def get_patches(self, repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id, )) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def get_patches(self, repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id,)) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def update_all(self, repo_id): """ update_all enable cache for adjacency matrices Pros: File paths in different revisions can be accessed randomly, i.e. after calling update_all, get_path can be called with any revision in any order. Cons: It consumes significant memory to store the adjacency matrices If the config has low_memory set to true, shelve will be used instead, to write the cache out to disk. """ profiler_start("Update all file paths") if Config().low_memory: self.shelve_file_name = str(time()) + "-shelve.db" # If there is an old file, shelf will complain viciously if os.path.exists(self.shelve_file_name): os.remove(self.shelve_file_name) self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, writeback=False) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.date""" cursor.execute(statement(query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall()] for id in all_commits: if old_id != id: adj = self.__dict__['cached_adj'].get(str(id)) if adj is None: self.update_for_revision(cursor, id, repo_id) self.__dict__['cached_adj'][str(id)] = \ deepcopy(self.__dict__['adj']) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def update_all(self, repo_id): """ update_all enable cache for adjacency matrices Pros: File paths in different revisions can be accessed randomly, i.e. after calling update_all, get_path can be called with any revision in any order. Cons: It consumes significant memory to store the adjacency matrices If the config has low_memory set to true, shelve will be used instead, to write the cache out to disk. """ profiler_start("Update all file paths") if Config().low_memory: self.shelve_file_name = str(time()) + "-shelve.db" # If there is an old file, shelf will complain viciously if os.path.exists(self.shelve_file_name): os.remove(self.shelve_file_name) self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, writeback=False) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.commit_date""" cursor.execute(statement(query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall()] for id in all_commits: if old_id != id: adj = self.__dict__['cached_adj'].get(str(id)) if adj is None: self.update_for_revision(cursor, id, repo_id) self.__dict__['cached_adj'][str(id)] = \ deepcopy(self.__dict__['adj']) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def populate_insert_args(self, job): args = [] cnn = self.db.connect() cursor = cnn.cursor() for h in job.hunks: query = "select id from scmlog where rev = ?" cursor.execute(statement(query, self.db.place_holder), (h.rev, )) fetched_row = cursor.fetchone() if fetched_row is not None: args.append((job.file_id, job.commit_id, h.start, h.end, fetched_row[0])) cursor.close() cnn.close() return args
def populate_insert_args(self, job): args = [] cnn = self.db.connect() cursor = cnn.cursor() for h in job.hunks: query = "select id from scmlog where rev = ?" cursor.execute(statement(query, self.db.place_holder), (h.rev,)) fetched_row = cursor.fetchone() if fetched_row is not None: args.append((job.file_id, job.commit_id, h.start, h.end, fetched_row[0])) cursor.close() cnn.close() return args
def fetchReleaseRevisions(self): cursor = self.connection.cursor() sql = "SELECT file_copies.from_id, file_copies.from_commit_id, tag_revisions.tag_id " + \ "FROM tag_revisions, scmlog, actions, file_copies " + \ "WHERE scmlog.id = tag_revisions.commit_id " + \ "AND actions.commit_id = scmlog.id " + \ "AND file_copies.action_id = actions.id " + \ "AND actions.branch_id = 1 " + \ "AND actions.type = 'C' " + \ "AND scmlog.repository_id=?" cursor.execute(statement(sql, self.db.place_holder), (self.repositoryId, )) fileLinks = [] rs = cursor.fetchmany() while rs: for row in rs: fileLinks.append([row[0], row[1], row[2]]) rs = cursor.fetchmany() cursor.close() resolvedFileLinks = [] for fileLink in fileLinks: self.__buildReleaseRevision(fileLink[0], fileLink[1], fileLink[2], resolvedFileLinks) for resolved in resolvedFileLinks: fileId = resolved[0] commitId = resolved[1] releaseId = resolved[2] sourceFile = self.filesDict.getEntity(fileId, commitId) revision = sourceFile.getLastRevisionPriorToCommit(commitId) if not (revision is None): release = self.releaseDict.getEntity(releaseId) releaseRevision = ReleaseRevisionEntity() releaseRevision.setRelease(release) releaseRevision.setRevision(revision) self.releaseRevisionDict.add(releaseRevision)
def __process_finished_jobs(self, job_pool, write_cursor, db): finished_job = job_pool.get_next_done() # scmlog_id is the commit ID. For some reason, the # documentaion advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: p = DBPatch (None, finished_job.commit_id, finished_job.data) execute_statement(statement(DBPatch.__insert__, self.db.place_holder), (p.id, p.commit_id, to_utf8(p.patch).decode("utf-8")), write_cursor, db, "Couldn't insert, duplicate patch?", exception=ExtensionRunError) finished_job = job_pool.get_next_done(0)
def __process_finished_jobs(self, job_pool, connection, db): if isinstance(self.db, SqliteDatabase): from sqlite3 import IntegrityError elif isinstance(self.db, MysqlDatabase): from MySQLdb import IntegrityError write_cursor = connection.cursor() finished_job = job_pool.get_next_done(0) processed_jobs = 0 # commit_id is the commit ID. For some reason, the # documentation advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: query = """ insert into content(commit_id, file_id, content, loc, size) values(?,?,?,?,?)""" insert_statement = statement(query, db.place_holder) parameters = ( finished_job.commit_id, finished_job.file_id, finished_job.file_contents, finished_job.file_number_of_lines, finished_job.file_size, ) try: write_cursor.execute(insert_statement, parameters) except IntegrityError as e: if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062: # Ignore duplicate entry pass else: printerr( "Error while inserting content for file %d @ commit %d" % (finished_job.file_id, finished_job.commit_id) ) raise processed_jobs += 1 finished_job = job_pool.get_next_done(0) connection.commit() write_cursor.close() return processed_jobs
def populate_insert_args(self, job): bug_revs = job.get_bug_revs () cnn = self.db.connect() cursor = cnn.cursor() args = [] for hunk_id in bug_revs: for rev in bug_revs[hunk_id]: printdbg("Find id for rev %s"%rev) query = "select id from scmlog where rev = ?" cursor.execute(statement(query, self.db.place_holder),(rev,)) fetched_row = cursor.fetchone() if fetched_row is not None: args.append((hunk_id,fetched_row[0])) cursor.close() cnn.close() return args
def update_all(self, repo_id): profiler_start("Update all file paths") db = self.__dict__['db'] cnn = db.connect () cursor = cnn.cursor () query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.id""" cursor.execute (statement (query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall ()] for id in all_commits: if old_id != id: self.update_for_revision (cursor, id, repo_id) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def __process_finished_jobs(self, job_pool, write_cursor, db): finished_job = job_pool.get_next_done(0) processed_jobs = 0 while finished_job is not None: query = """update scmlog set file_count = ? where id = ?""" insert_statement = statement(query, db.place_holder) parameters = (finished_job.ls_line_count, finished_job.row_id) execute_statement(insert_statement, parameters, write_cursor, db, "Couldn't update scmlog with ls line count", exception=ExtensionRunError) processed_jobs += 1 finished_job = job_pool.get_next_done(0) # print "Before return: %s"%(datetime.now()-start) return processed_jobs
def patch_generator(repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) icursor.execute( statement("SELECT id, rev, composed_rev " + "from scmlog where repository_id = ?", db.place_holder), (repo_id,), ) rs = icursor.fetchmany() while rs: for commit_id, revision, composed_rev in rs: # Get the patch pj = PatchJob(revision, commit_id) path = uri_to_filename(repo_uri) pj.run(repo, path or repo.get_uri()) # Yield the patch to hunks yield (pj.commit_id, pj.data, pj.rev) rs = icursor.fetchmany()
def run(self, repo, uri, db): profiler_start("Running PatchLOC extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() cursor = connection.cursor() path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone()[0] try: self.__create_table(connection) except TableAlreadyExists: pass except Exception, e: raise ExtensionRunError(str(e))
def __get_authors (self, cursor): query = "select id, name from people" cursor.execute (statement (query, self.db.place_holder)) self.authors = dict ([(name, id) for id, name in cursor.fetchall ()])
def __get_blames (self, cursor, repoid): query = "select b.file_id, b.commit_id from blame b, files f " + \ "where b.file_id = f.id and repository_id = ?" cursor.execute (statement (query, self.db.place_holder), (repoid,)) return [(res[0], res[1]) for res in cursor.fetchall ()]