def run(self, repo, uri, db): profiler_start("Running Blame extension") self.db = db cnn = self.db.connect() read_cursor = cnn.cursor() write_cursor = cnn.cursor() blames = [] try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute(statement("SELECT id from repositories " + \ "where uri = ?", db.place_holder), (repo_uri,)) repoid = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError("Blame extension is not supported for " + \ "%s repositories" % (repo.get_type())) except Exception, e: raise ExtensionRunError("Error creating repository %s. " + \ "Exception: %s" % (repo.get_uri(), str(e)))
def run(self, repo, uri, db): # Start the profiler, per every other extension profiler_start("Running FileCount extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() read_cursor = connection.cursor() write_cursor = connection.cursor() # Try to get the repository and get its ID from the database try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute(statement( \ "SELECT id from repositories where uri = ?", \ db.place_holder), (repo_uri,)) repo_id = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( \ "FileCount extension is not supported for %s repos" % \ (repo.get_type())) except Exception, e: raise ExtensionRunError( \ "Error creating repository %s. Exception: %s" % \ (repo.get_uri(), str(e)))
def run(self, repo, uri, db): def patch_generator(repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) icursor.execute( statement("SELECT id, rev, composed_rev " + "from scmlog where repository_id = ?", db.place_holder), (repo_id,), ) rs = icursor.fetchmany() while rs: for commit_id, revision, composed_rev in rs: # Get the patch pj = PatchJob(revision, commit_id) path = uri_to_filename(repo_uri) pj.run(repo, path or repo.get_uri()) # Yield the patch to hunks yield (pj.commit_id, pj.data, pj.rev) rs = icursor.fetchmany() profiler_start("Running PatchesAndHunks extension") hunks = Hunks() hunks.get_patches = patch_generator hunks.run(repo, uri, db)
def run(self, repo, uri, db): def patch_generator(repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) icursor.execute(statement("SELECT id, rev, composed_rev " + \ "from scmlog where repository_id = ?", db.place_holder), (repo_id,)) rs = icursor.fetchmany() while rs: for commit_id, revision, composed_rev in rs: # Get the patch pj = PatchJob(revision, commit_id) path = uri_to_filename(repo_uri) pj.run(repo, path or repo.get_uri()) p = DBPatch(db, commit_id, pj.data) # Yield the patch to hunks for file_id, patch in p.file_patches(): yield (pj.commit_id, file_id, patch, pj.rev) rs = icursor.fetchmany() profiler_start("Running PatchesAndHunks extension") hunks = Hunks() hunks.get_patches = patch_generator hunks.run(repo, uri, db)
def get_path_from_database(self, file_id, commit_id): """Returns the last valid path for a given file_id at commit_id (May have been removed afterwords!)""" if config.debug: profiler_start("Getting full file path for file_id %d and \ commit_id %d", (file_id, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT current_file_path from actions WHERE file_id=? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_id, commit_id)) try: file_path = cursor.fetchone()[0] except: file_path = None cursor.close() cnn.close() printdbg("get_path_from_database:\ Path for file_id %d at commit_id %d: %s", (file_id, commit_id, file_path)) if config.debug: profiler_stop("Getting full file path for file_id %d and\ commit_id %d", (file_id, commit_id), True) return file_path
def get_path_from_database(self, file_id, commit_id): """Returns the last valid path for a given file_id at commit_id (May have been removed afterwords!)""" if config.debug: profiler_start("Getting full file path for file_id %d and \ commit_id %d", (file_id, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_path from file_paths WHERE file_id=? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_id, commit_id)) try: file_path = cursor.fetchone()[0] except: file_path = None cursor.close() cnn.close() printdbg("get_path_from_database:\ Path for file_id %d at commit_id %d: %s", (file_id, commit_id, file_path)) if config.debug: profiler_stop("Getting full file path for file_id %d and\ commit_id %d", (file_id, commit_id), True) return file_path
def get_file_id(self, file_path, commit_id): """Ask for the file_id for a given file_path and commit_id""" if config.debug: profiler_start("Getting file id for file_path %s and commit_id %d", (file_path, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_id from actions WHERE binary current_file_path = ? AND commit_id = ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_path, commit_id)) try: file_id = cursor.fetchone()[0] except: file_id = None cursor.close() cnn.close() if config.debug: profiler_stop("Getting file id for file_path %s and commit_id %d", (file_path, commit_id), True) return file_id
def run (self, repo, repo_uri): profiler_start("Running BlameJob for %s@%s", (self.path,self.rev)) def blame_line (line, p): p.feed (line) repo_type = repo.get_type () if repo_type == 'cvs': # CVS paths contain the module stuff uri = repo.get_uri_for_path (repo_uri) module = uri[len (repo.get_uri ()):].strip ('/') if module != '.': path = self.path[len (module):].strip ('/') else: path = self.path.strip ('/') else: path = self.path.strip ('/') filename = os.path.basename (self.path) p = create_parser (repo.get_type (), self.path) out = self.get_content_handler() p.set_output_device (out) wid = repo.add_watch (BLAME, blame_line, p) try: repo.blame (os.path.join (repo_uri, path), self.rev) self.collect_results(out) except RepositoryCommandError, e: self.failed = True printerr ("Command %s returned %d (%s)", (e.cmd, e.returncode, e.error))
def run(self, repo, uri, db): profiler_start("Running HunkBlame extension") self.db = db cnn = self.db.connect() read_cursor = cnn.cursor() write_cursor = cnn.cursor() try: path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri read_cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repoid = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( "HunkBlame extension is not supported for %s repositories" % (repo.get_type())) except Exception, e: raise ExtensionRunError( "Error creating repository %s. Exception: %s" % (repo.get_uri(), str(e)))
def get_file_id(self, file_path, commit_id): """Ask for the file_id for a given file_path and commit_id""" if config.debug: profiler_start("Getting file id for file_path %s and commit_id %d", (file_path, commit_id)) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """SELECT file_id from file_paths WHERE file_path = ? AND commit_id <= ? ORDER BY commit_id DESC LIMIT 1""" cursor.execute(statement(query, db.place_holder), (file_path, commit_id)) try: file_id = cursor.fetchone()[0] except: file_id = None cursor.close() cnn.close() if config.debug: profiler_stop("Getting file id for file_path %s and commit_id %d", (file_path, commit_id), True) return file_id
def run(self, repo, uri, db): profiler_start("Running PatchLOC extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() cursor = connection.cursor() path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cursor.execute( statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri, )) repo_id = cursor.fetchone()[0] try: self.__create_table(connection) except TableAlreadyExists: pass except Exception, e: raise ExtensionRunError(str(e))
def line(self,blame_line): if not self.profiled: profiler_start("Processing blame output for %s",(self.filename)) self.profiled=True for hunk_id, start_line, end_line in self.hunks: if blame_line.line>= start_line and blame_line.line<= end_line: if self.bug_revs.get(hunk_id) is None: self.bug_revs[hunk_id] = set() self.bug_revs[hunk_id].add(blame_line.rev) break
def get_path(self, file_id, commit_id, repo_id): profiler_start("Getting path for file %d at commit %d", (file_id, commit_id)) adj = self.__dict__['adj'] assert adj is not None, "Matrix no updated" path = self.__build_path(file_id, adj) profiler_stop("Getting path for file %d at commit %d", (file_id, commit_id), True) return path
def line(self, blame_line): if not self.profiled: profiler_start("Processing blame output for %s", (self.filename)) self.profiled = True for hunk_id, start_line, end_line in self.hunks: if blame_line.line >= start_line and blame_line.line <= end_line: if self.bug_revs.get(hunk_id) is None: self.bug_revs[hunk_id] = set() self.bug_revs[hunk_id].add(blame_line.rev) break
def update_all(self, repo_id): """ update_all enable cache for adjacency matrices Pros: File paths in different revisions can be accessed randomly, i.e. after calling update_all, get_path can be called with any revision in any order. Cons: It consumes significant memory to store the adjacency matrices If the config has low_memory set to true, shelve will be used instead, to write the cache out to disk. """ profiler_start("Update all file paths") if Config().low_memory: self.shelve_file_name = str(time()) + "-shelve.db" # If there is an old file, shelf will complain viciously if os.path.exists(self.shelve_file_name): os.remove(self.shelve_file_name) self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, writeback=False) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.commit_date""" cursor.execute(statement(query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall()] for id in all_commits: if old_id != id: adj = self.__dict__['cached_adj'].get(str(id)) if adj is None: self.update_for_revision(cursor, id, repo_id) self.__dict__['cached_adj'][str(id)] = \ deepcopy(self.__dict__['adj']) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def update_all(self, repo_id): """ update_all enable cache for adjacency matrices Pros: File paths in different revisions can be accessed randomly, i.e. after calling update_all, get_path can be called with any revision in any order. Cons: It consumes significant memory to store the adjacency matrices If the config has low_memory set to true, shelve will be used instead, to write the cache out to disk. """ profiler_start("Update all file paths") if Config().low_memory: self.shelve_file_name = str(time()) + "-shelve.db" # If there is an old file, shelf will complain viciously if os.path.exists(self.shelve_file_name): os.remove(self.shelve_file_name) self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, writeback=False) db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.date""" cursor.execute(statement(query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall()] for id in all_commits: if old_id != id: adj = self.__dict__['cached_adj'].get(str(id)) if adj is None: self.update_for_revision(cursor, id, repo_id) self.__dict__['cached_adj'][str(id)] = \ deepcopy(self.__dict__['adj']) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def __build_path(self, file_id, adj): if file_id not in adj.adj: return None profiler_start("Building path for file %d", (file_id,)) tokens = [] id = file_id while id != -1: tokens.insert(0, adj.files[id]) id = adj.adj.get(id,-1) profiler_stop("Building path for file %d", (file_id,), True) return "/" + "/".join(tokens)
def __build_path(self, file_id, adj): if file_id not in adj.adj: return None profiler_start("Building path for file %d", (file_id,)) tokens = [] id = file_id while id != -1: tokens.insert(0, adj.files[id]) id = adj.adj[id] profiler_stop("Building path for file %d", (file_id,), True) return "/" + "/".join(tokens)
def __build_path(self, file_id, adj): if file_id not in adj.adj: return None profiler_start("Building path for file %d", (file_id,)) tokens = [] id = file_id while id is not None and id != -1: tokens.insert(0, adj.files[id]) #use get instead of index to avoid key error id = adj.adj.get(id) profiler_stop("Building path for file %d", (file_id,), True) return "/" + "/".join(tokens)
def get_patches(self, repo, repo_uri, repo_id, db, cursor): profiler_start("Hunks: fetch all patches") icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id, )) profiler_stop("Hunks: fetch all patches", delete=True) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def update_all(self, repo_id): profiler_start("Update all file paths") db = self.__dict__['db'] cnn = db.connect () cursor = cnn.cursor () query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.id""" cursor.execute (statement (query, db.place_holder), (repo_id,)) old_id = -1 all_commits = [i[0] for i in cursor.fetchall ()] for id in all_commits: if old_id != id: self.update_for_revision (cursor, id, repo_id) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def update_all(self, repo_id): profiler_start("Update all file paths") db = self.__dict__['db'] cnn = db.connect() cursor = cnn.cursor() query = """select distinct(s.id) from scmlog s, actions a where s.id = a.commit_id and repository_id=? order by s.id""" cursor.execute(statement(query, db.place_holder), (repo_id, )) old_id = -1 all_commits = [i[0] for i in cursor.fetchall()] for id in all_commits: if old_id != id: self.update_for_revision(cursor, id, repo_id) old_id = id cursor.close() cnn.close() profiler_stop("Update all file paths", delete=True)
def get_patches(self, repo, repo_uri, repo_id, db, cursor): profiler_start("Hunks: fetch all patches") icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id,)) profiler_stop("Hunks: fetch all patches", delete=True) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def run(self, repo, uri, db): profiler_start("Running Patches extension") self.db = db self.repo = repo path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri path = uri_to_filename(uri) self.repo_uri = path or repo.get_uri() cnn = self.db.connect() cursor = cnn.cursor() cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone()[0] # If table does not exist, the list of commits is empty, # otherwise it will be filled within the except block below commits = [] try: printdbg("Creating patches table") self.__create_table(cnn) except TableAlreadyExists: printdbg("Patches table exists already, getting max ID") cursor.execute(statement("SELECT max(id) from patches", db.place_holder)) id = cursor.fetchone()[0] if id is not None: DBPatch.id_counter = id + 1 commits = self.__get_patches_for_repository(repo_id, cursor) except Exception, e: raise ExtensionRunError(str(e))
def run(self, repo, uri, db): # Start the profiler, per every other extension profiler_start("Running BugFixMessage extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() read_cursor = connection.cursor() write_cursor = connection.cursor() # Try to get the repository and get its ID from the database try: repo_uri = get_repo_uri(uri, repo) repo_id = get_repo_id(repo_uri, read_cursor, db) except NotImplementedError: raise ExtensionRunError( \ "BugFixMessage extension is not supported for %s repos" % \ (repo.get_type())) except Exception, e: raise ExtensionRunError( \ "Error creating repository %s. Exception: %s" % \ (repo.get_uri(), str(e)))
def run(self, repo, uri, db): profiler_start("Running PatchLOC extension") # Open a connection to the database and get cursors self.db = db connection = self.db.connect() cursor = connection.cursor() path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) else: repo_uri = uri cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone()[0] try: self.__create_table(connection) except TableAlreadyExists: pass except Exception, e: raise ExtensionRunError(str(e))
i = i + 1 if i >= queuesize: printdbg("Content queue is now at %d, flushing to database", (i, )) processed_jobs = self.__process_finished_jobs( job_pool, write_cursor, db) connection.commit() i = i - processed_jobs if processed_jobs < (queuesize / 5): job_pool.join() job_pool.join() self.__process_finished_jobs(job_pool, write_cursor, db) profiler_start("Inserting results in db") #self.__insert_many(write_cursor) connection.commit() profiler_stop("Inserting results in db") read_cursor.close() write_cursor.close() connection.close() # This turns off the profiler and deletes it's timings profiler_stop("Running content extension", delete=True) def backout(self, repo, uri, db): update_statement = """delete from content where commit_id in (select id from scmlog s where s.repository_id = ?)"""
pass finally: inner_cursor.close() hunks = [h for h in hunks if h[0] not in blames] job = HunkBlameJob(hunks, relative_path, pre_rev) job_pool.push(job) n_blames += 1 if n_blames >= self.MAX_BLAMES: processed_jobs = self.process_finished_jobs( job_pool, write_cursor) n_blames -= processed_jobs if processed_jobs <= self.MAX_BLAMES / 5: profiler_start("Joining unprocessed jobs") job_pool.join() profiler_stop("Joining unprocessed jobs", delete=True) except NotValidHunkWarning as e: printerr("Not a valid hunk: " + str(e)) finally: file_rev = read_cursor.fetchone() job_pool.join() self.process_finished_jobs(job_pool, write_cursor, True) try: self.__drop_cache(cnn) except: printdbg("Couldn't drop cache because of " + str(e))
def get_commit_data(self, patch_content): profiler_start("get_commit_data") lines = [l + "\n" for l in patch_content.splitlines() if l] hunks = [] for patch in [p for p in parse_patches(lines, allow_dirty=True, \ allow_continue=True) if isinstance(p, Patch)]: # This method matches that of parseLine in UnifiedDiffParser.java # It's not necessarily intuitive, but this algorithm is much harder # than it looks, I spent hours trying to get a simpler solution. # It does, however, seem to work, which is pretty amazing when # you think about how difficult it is for long enough. # The trick that this method does is that each *part* of a hunk # ie. added, deleted, changed are treated as *new entities*. # The EntityDelta table does not store just diffs, it stores # each part of a diff. # I will need to copy the behavior of how Sep inserts a CommitData # into the database to ensure things match for hunk in patch.hunks: old_start_line = hunk.orig_pos - 1 new_start_line = hunk.mod_pos - 1 old_end_line = 0 new_end_line = 0 added = False deleted = False in_change = False for line in hunk.lines: if isinstance(line, RemoveLine): if not in_change or not deleted: in_change = True old_start_line += 1 old_end_line = old_start_line else: old_end_line += 1 deleted = True elif isinstance(line, InsertLine): if not in_change or not added: in_change = True new_start_line += 1 new_end_line = new_start_line else: new_end_line += 1 added = True elif isinstance(line, ContextLine): if in_change: in_change = False printdbg("Patch new name: " + patch.newname) file_name = re.split('\s+', patch.newname)[0] if file_name == "/dev/null": file_name = re.split('\s+', patch.oldname)[0] cd = CommitData(file_name) if deleted: cd.old_start_line = old_start_line cd.old_end_line = old_end_line old_start_line = old_end_line if added: cd.new_start_line = new_start_line cd.new_end_line = new_end_line new_start_line = new_end_line hunks.append(cd) added = deleted = False old_start_line += 1 new_start_line += 1 # The diff ended without a new context line if in_change: cd = CommitData(re.split('\s+', patch.newname)[0]) if deleted: cd.old_start_line = old_start_line cd.old_end_line = old_end_line if added: cd.new_start_line = new_start_line cd.new_end_line = new_end_line hunks.append(cd) profiler_stop("get_commit_data") return hunks
repo_uri = uri read_cursor.execute(statement( \ "SELECT id from repositories where uri = ?", \ db.place_holder), (repo_uri,)) repo_id = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( \ "Content extension is not supported for %s repos" % \ (repo.get_type())) except Exception, e: raise ExtensionRunError( \ "Error creating repository %s. Exception: %s" % \ (repo.get_uri(), str(e))) profiler_start("Hunks: fetch all patches") icursor = ICursor(read_cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id, )) profiler_stop("Hunks: fetch all patches", delete=True) self.__prepare_table(connection) fp = FilePaths(db) rs = icursor.fetchmany() while rs:
except Exception as e: pass finally: inner_cursor.close() hunks = [h for h in hunks if h[0] not in blames] job = HunkBlameJob(hunks, relative_path, pre_rev) job_pool.push (job) n_blames += 1 if n_blames >= self.MAX_BLAMES: processed_jobs = self.process_finished_jobs (job_pool, write_cursor) n_blames -= processed_jobs if processed_jobs<=self.MAX_BLAMES/5: profiler_start("Joining unprocessed jobs") job_pool.join() profiler_stop("Joining unprocessed jobs", delete=True) except NotValidHunkWarning as e: printerr("Not a valid hunk: "+str(e)) finally: file_rev = read_cursor.fetchone() job_pool.join () self.process_finished_jobs (job_pool, write_cursor, True) try: self.__drop_cache(cnn) except: printdbg("Couldn't drop cache because of " + str(e))
def update_for_revision(self, cursor, commit_id, repo_id): db = self.__dict__['db'] if commit_id == self.__dict__['rev']: return prev_commit_id = self.__dict__['rev'] self.__dict__['rev'] = commit_id profiler_start("Updating adjacency matrix for commit %d", (commit_id,)) if self.__dict__['adj'] is None: adj = Adj() self.__dict__['adj'] = adj else: adj = self.__dict__['adj'] rf = self.__dict__['files'] if rf is not None: repo_files_id, repo_files = rf if repo_files_id != repo_id: del self.__dict__['files'] repo_files = {} else: repo_files = {} if not repo_files: # Get and cache all the files table query = "select id, file_name from files where repository_id = ?" # profiler_start("Getting files for repository %d", (repo_id,)) cursor.execute(statement(query, db.place_holder), (repo_id,)) # profiler_stop("Getting files for repository %d", (repo_id,), # True) rs = cursor.fetchmany() while rs: for id, file_name in rs: repo_files[id] = file_name rs = cursor.fetchmany() self.__dict__['files'] = (repo_id, repo_files) adj.files = repo_files # Get the files that have been renamed # with the new name for the given rev query = "select af.file_id, af.new_file_name " + \ "from actions_file_names af, files f " + \ "where af.file_id = f.id " + \ "and af.commit_id = ? " + \ "and af.type = 'V' " + \ "and f.repository_id = ?" # profiler_start("Getting new file names for commit %d", (commit_id,)) cursor.execute(statement(query, db.place_holder), (commit_id, repo_id)) # profiler_stop("Getting new file names for commit %d", (commit_id,), # True) rs = cursor.fetchmany() while rs: for id, file_name in rs: adj.files[id] = file_name rs = cursor.fetchmany() # Get the new file links since the last time query = "select fl.parent_id, fl.file_id " + \ "from file_links fl, files f " + \ "where fl.file_id = f.id " if prev_commit_id is None: query += "and fl.commit_id = ? " args = (commit_id, repo_id) else: query += "and fl.commit_id between ? and ? " args = (prev_commit_id, commit_id, repo_id) query += "and f.repository_id = ?" # profiler_start("Getting file links for commit %d", (commit_id,)) cursor.execute(statement(query, db.place_holder), args) # profiler_stop("Getting file links for commit %d", (commit_id,), True) rs = cursor.fetchmany() while rs: for f1, f2 in rs: adj.adj[f2] = f1 rs = cursor.fetchmany() profiler_stop("Updating adjacency matrix for commit %d", (commit_id,), True)
def run(self, repo, uri, db): #record how many patches contains different file name function_name_change_count = 0 #only suitable for my computer, user can change according to your own settings prefix = r'/home/moqi/Downloads/voldemort' #old file name f_of_old = open('/home/moqi/Downloads/voldemort/old', 'w') #new file name f_of_new = open('/home/moqi/Downloads/voldemort/new', 'w') #store information returns by search_lines search_result={} #number of exception #such as /null and file has been deleted so that can not open #not accurate num_of_exception = 0 #number of file which do not belong to source files non_source_file = 0 #number of patch which commit_id = 1 num_of_id1 = 0 #number of files can not be recovered num_of_unrecovered = 0 #old_cla contains class definition in old file old_cla = sets.Set() new_cla = sets.Set() old_func = sets.Set() new_func = sets.Set() #max id in table patches id_max = 0 #patch_id patch_id = 0 #file_id file_id = 0 ##old_class, new_class, old_function, new_function old_class = '' new_class = '' old_function = '' new_function = '' __insert__ = """INSERT INTO analyse_patch (patch_id, commit_id, file_id, old_class, new_class, old_function, new_function, if_id1) values (?, ?, ?, ?, ?, ?, ?, ?)""" start = time.time() profiler_start("Running analyse_patch extension") self.db = db self.repo = repo path = uri_to_filename(uri) if path is not None: repo_uri = repo.get_uri_for_path(path) ##added by me prefix = path else: repo_uri = uri path = uri_to_filename(uri) self.repo_uri = path or repo.get_uri() cnn = self.db.connect() cursor = cnn.cursor() write_cursor = cnn.cursor() cursor.execute(statement("SELECT id from repositories where uri = ?", db.place_holder), (repo_uri,)) repo_id = cursor.fetchone()[0] try: printdbg("Creating analyse_patch table") self.__create_table(cnn) except TableAlreadyExists: pass except Exception, e: raise ExtensionRunError(str(e))
def run(self, repo, repo_uri): profiler_start("Processing patch for revision %s", (self.rev)) self.repo = repo self.repo_uri = repo_uri self.get_patch_for_commit() profiler_stop("Processing patch for revision %s", (self.rev))
repo_uri = uri read_cursor.execute(statement( \ "SELECT id from repositories where uri = ?", \ db.place_holder), (repo_uri,)) repo_id = read_cursor.fetchone()[0] except NotImplementedError: raise ExtensionRunError( \ "Content extension is not supported for %s repos" % \ (repo.get_type())) except Exception, e: raise ExtensionRunError( \ "Error creating repository %s. Exception: %s" % \ (repo.get_uri(), str(e))) profiler_start("Hunks: fetch all patches") icursor = ICursor(read_cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id,)) profiler_stop("Hunks: fetch all patches", delete=True) self.__prepare_table(connection) fp = FilePaths(db) rs = icursor.fetchmany() while rs:
i = i + 1 if i >= queuesize: printdbg("Content queue is now at %d, flushing to database", (i,)) processed_jobs = self.__process_finished_jobs(job_pool, write_cursor, db) connection.commit() i = i - processed_jobs if processed_jobs < (queuesize / 5): job_pool.join() job_pool.join() self.__process_finished_jobs(job_pool, write_cursor, db) profiler_start("Inserting results in db") #self.__insert_many(write_cursor) connection.commit() profiler_stop("Inserting results in db") read_cursor.close() write_cursor.close() connection.close() # This turns off the profiler and deletes it's timings profiler_stop("Running content extension", delete=True) def backout(self, repo, uri, db): update_statement = """delete from content where commit_id in (select id from scmlog s where s.repository_id = ?)"""