def get_patch_for_commit(self): def diff_line(data, io): io.write(data) io = BytesIO() wid = self.repo.add_watch(DIFF, diff_line, io) done = False failed = False retries = 3 while not done and not failed: try: self.repo.show(self.repo_uri, self.rev) self.data = to_utf8(io.getvalue().strip()).decode("utf-8") done = True except (CommandError, CommandRunningError) as e: if retries > 0: printerr("Error running show command: %s, trying again", (str(e),)) retries -= 1 io.seek(0) elif retries <= 0: failed = True printerr("Error running show command: %s, FAILED", (str(e),)) self.data = None self.repo.remove_watch(DIFF, wid) return self.data
def get_line_types(repo, repo_uri, rev, path): """Returns an array, where each item means a line of code. Each item is labled 'code', 'comment' or 'empty'""" #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path)) uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path file_content = _get_file_content(repo, uri, rev) # get file_content if file_content is None or file_content == '': printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.") line_types = None else: try: lexer = get_lexer_for_filename(path) except ClassNotFound: try: printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".") lexer = guess_lexer(file_content) except ClassNotFound: printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.") lexer = TextLexer() if isinstance(lexer, NemerleLexer): # this lexer is broken and yield an unstoppable process # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop lexer = TextLexer() # Not shure if this should be skipped, when the language uses off-side rules (e.g. python, # see http://en.wikipedia.org/wiki/Off-side_rule for list) stripped_code = _strip_lines(file_content) lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code)) line_types_str = _comment_empty_or_code(lexer_output) line_types = line_types_str.split("\n") return line_types
def __init__(self, repo, uri): LineCounter.__init__(self, repo, uri) self.commit_pattern = re.compile("^(\w+) ") self.file_pattern = re.compile("^(\d+)\s+(\d+)\s+([^\s].*)$") # Dictionary for storing added, removed pairs, keyed by commit. self.lines = {} # Dictionary for storing list of paths, keyed by commit. self.paths = {} # Dictionary for storing added, removed pairs, keyed by commit. # and path self.lines_files = {} # Run git command self.git = find_program('git') if self.git is None: raise ExtensionRunError("Error running CommitsLOCDet extension: " + "required git command cannot be found in path") cmd = [self.git, 'log', '--all', '--topo-order', '--numstat', '--pretty=oneline'] c = Command(cmd, uri) try: c.run(parser_out_func=self.__parse_line) except CommandError, e: if e.error: printerr("Error running git log command: %s", (e.error,)) raise ExtensionRunError("Error running " + "CommitsLOCDet extension: %s", str(e))
def run (self, repo, repo_uri): profiler_start("Running BlameJob for %s@%s", (self.path,self.rev)) def blame_line (line, p): p.feed (line) repo_type = repo.get_type () if repo_type == 'cvs': # CVS paths contain the module stuff uri = repo.get_uri_for_path (repo_uri) module = uri[len (repo.get_uri ()):].strip ('/') if module != '.': path = self.path[len (module):].strip ('/') else: path = self.path.strip ('/') else: path = self.path.strip ('/') filename = os.path.basename (self.path) p = create_parser (repo.get_type (), self.path) out = self.get_content_handler() p.set_output_device (out) wid = repo.add_watch (BLAME, blame_line, p) try: repo.blame (os.path.join (repo_uri, path), self.rev) self.collect_results(out) except RepositoryCommandError, e: self.failed = True printerr ("Command %s returned %d (%s)", (e.cmd, e.returncode, e.error))
def listen_for_data(self, repo_func, watcher): def write_line(data, io): io.write(data) io = BytesIO() wid = self.repo.add_watch(watcher, write_line, io) # Git doesn't need retries because all of the revisions # are already on disk if self.repo_type == "git": retries = 0 else: retries = 3 done = False failed = False # Try downloading the file revision while not done and not failed: try: repo_func(os.path.join(self.repo_uri, self.path), self.rev) done = True except RepositoryCommandError, e: if retries > 0: printerr("Command %s returned %d(%s), try again", (e.cmd, e.returncode, e.error)) retries -= 1 io.seek(0) elif retries == 0: failed = True printerr( "Error obtaining %s@%s. " + "Command %s returned %d(%s)", (self.path, self.rev, e.cmd, e.returncode, e.error), ) except:
def _do_backout(self, repo, uri, db, backout_statement): connection = db.connect() repo_cursor = connection.cursor() repo_uri = get_repo_uri(uri, repo) try: repo_id = get_repo_id(repo_uri, repo_cursor, db) except RepoNotFound: # Repository isn't in there, so it's likely already backed out printerr("Repository not found, is it in the database?") return True finally: repo_cursor.close() update_cursor = connection.cursor() execute_statement(statement(backout_statement, db.place_holder), (repo_id,), update_cursor, db, "Couldn't backout extension", exception=ExtensionBackoutError) update_cursor.close() connection.commit() connection.close()
def __prepare_table(self, connection, drop_table=False): cursor = connection.cursor() # Drop the table's old data if drop_table: try: cursor.execute("DROP TABLE hunks") except Exception, e: printerr("Couldn't drop hunks table because %s", (e,))
def __prepare_table(self, connection, drop_table=False): # Drop the table's old data if drop_table: cursor = connection.cursor() try: cursor.execute("DROP TABLE content") except Exception, e: printerr("Couldn't drop content table because %s", (e,)) finally:
def __prepare_table(self, connection, drop_table=False): # Drop the table's old data if drop_table: cursor = connection.cursor() try: cursor.execute("DROP TABLE content") except Exception, e: printerr("Couldn't drop content table because %s", (e, )) finally:
def get_extension (extension_name): if extension_name not in _extensions: try: __import__ ("pycvsanaly2.extensions.%s" % extension_name) except ImportError as e: printerr("Error in importing extension %s: %s", (extension_name, str(e))) if extension_name not in _extensions: raise ExtensionUnknownError ('Extension %s not registered' % extension_name) return _extensions[extension_name]
def get_patch_for_commit(self, rev): def diff_line(data, io): io.write(data) io = StringIO() wid = self.repo.add_watch(DIFF, diff_line, io) try: self.repo.show(self.repo_uri, rev) data = io.getvalue() except Exception, e: printerr("Error running show command: %s", [str(e)]) data = None
def get_patch_for_commit (self, rev): def diff_line (data, io): io.write (data) io = StringIO () wid = self.repo.add_watch (DIFF, diff_line, io) try: self.repo.show (self.repo_uri, rev) data = io.getvalue () except Exception, e: printerr ("Error running show command: %s", (str (e))) data = None
def iter_file_patch(iter_lines, allow_dirty=False): ''' :arg iter_lines: iterable of lines to parse for patches :kwarg allow_dirty: If True, allow comments and other non-patch text before the first patch. Note that the algorithm here can only find such text before any patches have been found. Comments after the first patch are stripped away in iter_hunks() if it is also passed allow_dirty=True. Default False. ''' ### FIXME: Docstring is not quite true. We allow certain comments no # matter what, If they startwith '===', '***', or '#' Someone should # reexamine this logic and decide if we should include those in # allow_dirty or restrict those to only being before the patch is found # (as allow_dirty does). regex = re.compile(binary_files_re) saved_lines = [] orig_range = 0 beginning = True for line in iter_lines: if line.startswith('=== ') or line.startswith('*** '): continue if line.startswith('#'): continue elif orig_range > 0: if line.startswith('-') or line.startswith(' '): orig_range -= 1 elif line.startswith('--- ') or regex.match(line): if allow_dirty and beginning: # Patches can have "junk" at the beginning # Stripping junk from the end of patches is handled when we # parse the patch beginning = False elif len(saved_lines) > 0: yield saved_lines saved_lines = [] elif line.startswith('@@'): try: hunk = hunk_from_header(line) except MalformedHunkHeader, e: if allow_dirty: printerr( "\nError: MalformedHunkHeader; Probably merge commit. Skipping." ) continue raise e orig_range = hunk.orig_range saved_lines.append(line)
def iter_file_patch(iter_lines, allow_dirty=False): ''' :arg iter_lines: iterable of lines to parse for patches :kwarg allow_dirty: If True, allow comments and other non-patch text before the first patch. Note that the algorithm here can only find such text before any patches have been found. Comments after the first patch are stripped away in iter_hunks() if it is also passed allow_dirty=True. Default False. ''' ### FIXME: Docstring is not quite true. We allow certain comments no # matter what, If they startwith '===', '***', or '#' Someone should # reexamine this logic and decide if we should include those in # allow_dirty or restrict those to only being before the patch is found # (as allow_dirty does). regex = re.compile(binary_files_re) saved_lines = [] orig_range = 0 beginning = True for line in iter_lines: if line.startswith('=== ') or line.startswith('*** '): continue if line.startswith('#'): continue elif orig_range > 0: if line.startswith('-') or line.startswith(' '): orig_range -= 1 elif line.startswith('--- ') or regex.match(line): if allow_dirty and beginning: # Patches can have "junk" at the beginning # Stripping junk from the end of patches is handled when we # parse the patch beginning = False elif len(saved_lines) > 0: yield saved_lines saved_lines = [] elif line.startswith('@@'): try: hunk = hunk_from_header(line) except MalformedHunkHeader, e: if allow_dirty: printerr("\nError: MalformedHunkHeader; Probably merge commit. Skipping.") continue raise e orig_range = hunk.orig_range saved_lines.append(line)
def _get_file_content(repo, uri, rev): """Reads the content of a file and revision from a given repository""" def write_line(data, io): io.write(data) io = BytesIO() wid = repo.add_watch(CAT, write_line, io) try: repo.cat(uri, rev) file_content = to_utf8(io.getvalue()).decode("utf-8") file_content = _convert_linebreaks(file_content) #make shure we do have the same new lines. except Exception as e: printerr("[get_line_types] Error running show command: %s, FAILED", (str(e),)) file_content = None repo.remove_watch(CAT, wid) return file_content
def __init__(self, repo, uri): LineCounter.__init__(self, repo, uri) self.git = find_program('git') if self.git is None: raise ExtensionRunError("Error running CommitsLOC extension: " + "required git command cannot be found in path") self.lines = {} cmd = [self.git, 'log', '--all', '--topo-order', '--shortstat', '--pretty=oneline', 'origin'] c = Command(cmd, uri) try: c.run(parser_out_func=self.__parse_line) except CommandError, e: if e.error: printerr("Error running git log command: %s", (e.error,)) raise ExtensionRunError("Error running CommitsLOC extension: %s", str(e))
def iter_hunks(iter_lines, allow_dirty=False): ''' :arg iter_lines: iterable of lines to parse for hunks :kwarg allow_dirty: If True, when we encounter something that is not a hunk header when we're looking for one, assume the rest of the lines are not part of the patch (comments or other junk). Default False ''' hunk = None for line in iter_lines: if line == "\n": if hunk is not None: yield hunk hunk = None continue if hunk is not None: yield hunk try: hunk = hunk_from_header(line) except MalformedHunkHeader: if allow_dirty: # If the line isn't a hunk header, then we've reached the end # of this patch and there's "junk" at the end. Ignore the # rest of this patch. return raise orig_size = 0 mod_size = 0 while orig_size < hunk.orig_range or mod_size < hunk.mod_range: try: hunk_line = parse_line(iter_lines.next()) hunk.lines.append(hunk_line) if isinstance(hunk_line, (RemoveLine, ContextLine)): orig_size += 1 if isinstance(hunk_line, (InsertLine, ContextLine)): mod_size += 1 except StopIteration: break except MalformedLine, e: if allow_dirty: printerr( "\nError: MalformedLine; Probably binary file. Skipping line." ) continue raise e
def __process_finished_jobs(self, job_pool, connection, db): if isinstance(self.db, SqliteDatabase): from sqlite3 import IntegrityError elif isinstance(self.db, MysqlDatabase): from MySQLdb import IntegrityError write_cursor = connection.cursor() finished_job = job_pool.get_next_done(0) processed_jobs = 0 # commit_id is the commit ID. For some reason, the # documentation advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: query = """ insert into content(commit_id, file_id, content, loc, size) values(?,?,?,?,?)""" insert_statement = statement(query, db.place_holder) parameters = ( finished_job.commit_id, finished_job.file_id, finished_job.file_contents, finished_job.file_number_of_lines, finished_job.file_size, ) try: write_cursor.execute(insert_statement, parameters) except IntegrityError as e: if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062: # Ignore duplicate entry pass else: printerr( "Error while inserting content for file %d @ commit %d" % (finished_job.file_id, finished_job.commit_id) ) raise processed_jobs += 1 finished_job = job_pool.get_next_done(0) connection.commit() write_cursor.close() return processed_jobs
def _get_file_content(repo, uri, rev): """Reads the content of a file and revision from a given repository""" def write_line(data, io): io.write(data) io = BytesIO() wid = repo.add_watch(CAT, write_line, io) try: repo.cat(uri, rev) file_content = to_utf8(io.getvalue()).decode("utf-8") file_content = _convert_linebreaks( file_content) #make shure we do have the same new lines. except Exception as e: printerr("[get_line_types] Error running show command: %s, FAILED", (str(e), )) file_content = None repo.remove_watch(CAT, wid) return file_content
def iter_hunks(iter_lines, allow_dirty=False): ''' :arg iter_lines: iterable of lines to parse for hunks :kwarg allow_dirty: If True, when we encounter something that is not a hunk header when we're looking for one, assume the rest of the lines are not part of the patch (comments or other junk). Default False ''' hunk = None for line in iter_lines: if line == "\n": if hunk is not None: yield hunk hunk = None continue if hunk is not None: yield hunk try: hunk = hunk_from_header(line) except MalformedHunkHeader: if allow_dirty: # If the line isn't a hunk header, then we've reached the end # of this patch and there's "junk" at the end. Ignore the # rest of this patch. return raise orig_size = 0 mod_size = 0 while orig_size < hunk.orig_range or mod_size < hunk.mod_range: try: hunk_line = parse_line(iter_lines.next()) hunk.lines.append(hunk_line) if isinstance(hunk_line, (RemoveLine, ContextLine)): orig_size += 1 if isinstance(hunk_line, (InsertLine, ContextLine)): mod_size += 1 except StopIteration: break except MalformedLine, e: if allow_dirty: printerr("\nError: MalformedLine; Probably binary file. Skipping line.") continue raise e
def run(self, repo, repo_uri): profiler_start("Running HunkBlameJob for %s@%s", (self.path, self.rev)) def blame_line(line, p): p.feed(line) start = sys.maxint end = 0 for hunk in self.hunks: if hunk[1] < start: start = hunk[1] if hunk[2] > end: end = hunk[2] repo_type = repo.get_type() if repo_type == 'cvs': # CVS paths contain the module stuff uri = repo.get_uri_for_path(repo_uri) module = uri[len(repo.get_uri()):].strip('/') if module != '.': path = self.path[len(module):].strip('/') else: path = self.path.strip('/') else: path = self.path.strip('/') p = create_parser(repo.get_type(), self.path) out = self.get_content_handler() p.set_output_device(out) wid = repo.add_watch(BLAME, blame_line, p) try: repo.blame(os.path.join(repo_uri, path), self.rev, start=start, end=end) self.collect_results(out) except RepositoryCommandError, e: self.failed = True printerr("Command %s returned %d (%s)", (e.cmd, e.returncode, e.error))
def __process_finished_jobs(self, job_pool, connection, db): if isinstance(self.db, SqliteDatabase): from sqlite3 import IntegrityError elif isinstance(self.db, MysqlDatabase): from MySQLdb import IntegrityError write_cursor = connection.cursor() finished_job = job_pool.get_next_done(0) processed_jobs = 0 # commit_id is the commit ID. For some reason, the # documentation advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: query = """ insert into content(commit_id, file_id, content, loc, size) values(?,?,?,?,?)""" insert_statement = statement(query, db.place_holder) parameters = (finished_job.commit_id, finished_job.file_id, finished_job.file_contents, finished_job.file_number_of_lines, finished_job.file_size) try: write_cursor.execute(insert_statement, parameters) except IntegrityError as e: if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062: # Ignore duplicate entry pass else: printerr( 'Error while inserting content for file %d @ commit %d' % (finished_job.file_id, finished_job.commit_id)) raise processed_jobs += 1 finished_job = job_pool.get_next_done(0) connection.commit() write_cursor.close() return processed_jobs
def get_lines_for_revision(self, revision): revision = int(revision) def diff_line(data, diff_data_l): diff_data = diff_data_l[0] diff_data += data diff_data_l[0] = diff_data revs = [] revs.append("%d" % (revision - 1)) revs.append("%d" % (revision)) env = os.environ.copy().update({'LC_ALL': 'C'}) pipe = Popen(self.diffstat, shell=False, stdin=PIPE, stdout=PIPE, close_fds=True, env=env) diff_data = [""] wid = self.repo.add_watch(DIFF, diff_line, diff_data) try: self.repo.diff(self.repo.get_uri(), revs=revs, no_whitespace=True) except Exception, e: printerr("Error running svn diff command: %s", [str(e)]) self.repo.remove_watch(DIFF, wid) return (0, 0)
def run(self, repo, repo_uri): def write_line(data, io): io.write(data) self.repo = repo self.repo_uri = repo_uri self.repo_type = self.repo.get_type() io = BytesIO() wid = repo.add_watch(LS, write_line, io) # Git doesn't need retries because all of the revisions # are already on disk if self.repo_type == 'git': retries = 0 else: retries = 3 done = False failed = False # Try downloading the file listing while not done and not failed: try: self.repo.ls(self.repo_uri, self.rev) done = True except RepositoryCommandError, e: if retries > 0: printerr("Command %s returned %d(%s), try again",\ (e.cmd, e.returncode, e.error)) retries -= 1 io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e)))
def main(argv): # Short (one letter) options. Those requiring argument followed by : short_opts = "hu:p:d:H:w:" # Long options (all started by --). Those requiring argument followed by = long_opts = [ "help", "db-user="******"db-password="******"db-hostname=", "db-database=", "db-driver=", "window=" ] # Default options user = "******" passwd = None hostname = "localhost" database = "cvsanaly" driver = "mysql" slidingTimeWindow = 120 projectName = None repositoryUri = None fetchPath = os.getenv("FETCH") if fetchPath is None: printerr("FETCH environment variable not set") printerr("Use " + sys.argv[0] + " --help for details") return 1 snavPath = os.getenv("SN_HOME") if snavPath is None: printerr("SN_HOME environment variable not set") printerr("Use " + sys.argv[0] + " --help for details") return 1 pmcPath = os.getenv("PMC") if pmcPath is None: printerr("PMC environment variable not set") printerr("Use " + sys.argv[0] + " --help for details") return 1 ldiffPath = os.getenv("LDIFF") if ldiffPath is None: printerr("LDIFF environment variable not set") printerr("Use " + sys.argv[0] + " --help for details") return 1 try: opts, args = getopt.getopt(argv[1:], short_opts, long_opts) except getopt.GetoptError, e: printerr(str(e)) return 1
io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e))) self.repo.remove_watch(LS, wid) if failed: printerr("Failure due to error") else: try: self.ls_lines = io.getvalue().splitlines() if Config().count_types: self.ls_lines = [ fp for fp in self.ls_lines if guess_file_type(fp) in Config().count_types ] except Exception, e: printerr("Error getting ls-lines." + "Exception: %s", (str(e), )) finally: io.close()
def run(self, repo, uri, db): """Fill in the commits_lines table. Create a counter to find number of lines added and removed for each commit in repo, create an object to manage the commits_lines table, for each commit in repo, create an entry in commit_lines table (except for those that already were in the table). """ cnn = db.connect() # Cursor for reading from the database cursor = cnn.cursor() # Cursor for writing to the database write_cursor = cnn.cursor() repo_id = self._get_repo_id(repo, uri, cursor) # Counter to find lines added, removed for each commit counter = create_line_counter_for_repository(repo, uri) # Object to manage the commits_lines table theTableComLines = TableComLines(db, cnn, repo_id) # Object to manage the commits_files_lines table theTableComFilLines = TableComFilLines(db, cnn, repo_id) cursor.execute("SELECT id, rev, composed_rev " + "FROM scmlog WHERE repository_id = '%s'", (repo_id,)) rows_left = True while rows_left: rows = cursor.fetchmany() for id, revision, composed_rev in rows: if composed_rev: commit = revision.split("|")[0] else: commit = revision cadded = cremoved = 0 if not theTableComLines.in_table(id): (cadded, cremoved) = counter.get_lines_for_commit(commit) theTableComLines.add_pending_row((None, id, cadded, cremoved)) tadded = tremoved = 0 for path in counter.get_paths_for_commit(commit): if not theTableComFilLines.in_table(str(id) + ',' + path): (added, removed) = \ counter.get_lines_for_commit_file(commit, path) theTableComFilLines.add_pending_row((None, id, path, added, removed)) tadded += int(added) tremoved += int(removed) # Sanity check if (cadded != tadded) or (cremoved != tremoved): printerr("Sanity check failed: %d, %s, %d, %d, %d, %d" % (id, commit, cadded, tadded, cremoved, tremoved)) printerr(counter.get_paths_for_commit(commit)) theTableComLines.insert_rows(write_cursor) theTableComFilLines.insert_rows(write_cursor) if not rows: rows_left = False cnn.commit() write_cursor.close() cursor.close() cnn.close()
for hunk in self.get_commit_data(patch_content): # Get the file ID from the database for linking hunk_file_name = re.sub(r'^[ab]\/', '', hunk.file_name.strip()) file_id = fp.get_file_id(hunk_file_name, commit_id) if file_id == None: printdbg("file not found") if repo.type == "git": # The liklihood is that this is a merge, not a # missing ID from some data screwup. # We'll just continue and throw this away continue else: printerr("No file ID found for hunk " + \ hunk_file_name + \ " at commit " + str(commit_id)) insert = """insert into hunks(file_id, commit_id, old_start_line, old_end_line, new_start_line, new_end_line) values(?,?,?,?,?,?)""" execute_statement(statement(insert, db.place_holder), (file_id, commit_id, hunk.old_start_line, hunk.old_end_line, hunk.new_start_line, hunk.new_end_line), write_cursor, db,
for hunk in self.get_commit_data(patch_content): # Get the file ID from the database for linking hunk_file_name = re.sub(r'^[ab]\/', '', hunk.file_name.strip()) file_id = fp.get_file_id(hunk_file_name, commit_id) if file_id == None: printdbg("file not found") if repo.type == "git": # The liklihood is that this is a merge, not a # missing ID from some data screwup. # We'll just continue and throw this away continue else: printerr("No file ID found for hunk " + \ hunk_file_name + \ " at commit " + commit_id) insert = """insert into hunks(file_id, commit_id, old_start_line, old_end_line, new_start_line, new_end_line) values(?,?,?,?,?,?)""" execute_statement(statement(insert, db.place_holder), (file_id, commit_id, hunk.old_start_line, hunk.old_end_line, hunk.new_start_line, hunk.new_end_line), write_cursor, db, "Couldn't insert hunk, dup record?", exception=ExtensionRunError)
# Configuration & initialisation of output cdifWriter.writeMooseCompliantCdif() outputHandle = open(outputFile, "w") outputHandle.close() outputHandle = open(outputFile, "a+") EvoCdifWriter.setOutputHandle(outputHandle) cdifWriter.set_outputhandle(outputHandle) cdifWriter.initializeIDCounter() cdifWriter.generateHeader(True, "evofetch", projectName, "4", "C++", "ISO98") try: db = create_database(driver, database, user, passwd, hostname) except AccessDenied, e: printerr("Error creating database: %s", (e.message)) return 1 except DatabaseNotFound: printerr( "Database %s doesn't exist. It must be created before running cvsanaly", (database)) return 1 except DatabaseDriverNotSupported: printerr("Database driver %s is not supported by cvsanaly", (driver)) return 1 cnn = db.connect() try: repositoryID = findRepository(db, cnn, projectName) except ProjectNotFound:
retries -= 1 io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e))) self.repo.remove_watch(watcher, wid) if failed: printerr("Failure due to error") else: try: results = io.getvalue() return results except Exception, e: printerr("Error getting contents." + "Exception: %s", (str(e), )) finally: io.close() def _get_file_contents(self): """Returns contents of the file, stripped of whitespace at either end """ # An encode will fail if the source code can't be converted to
break break if possible_file[1] == hunk_file_name: file_id = possible_file[0] break break if file_id == None: if repo.type == "git": # The liklihood is that this is a merge, not a # missing ID from some data screwup. # We'll just continue and throw this away continue else: printerr("No file ID found for hunk " + hunk_file_name) insert = """insert into hunks(file_id, commit_id, old_start_line, old_end_line, new_start_line, new_end_line) values(?,?,?,?,?,?)""" execute_statement(statement(insert, db.place_holder), (file_id, commit_id, hunk.old_start_line, \ hunk.old_end_line, hunk.new_start_line, \ hunk.new_end_line), write_cursor, db, "Couldn't insert hunk, duplicate record?", exception=ExtensionRunError)
io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e))) self.repo.remove_watch(LS, wid) if failed: printerr("Failure due to error") else: try: self.ls_lines = io.getvalue().splitlines() if Config().count_types: self.ls_lines = [fp for fp in self.ls_lines if guess_file_type(fp) in Config().count_types] except Exception, e: printerr("Error getting ls-lines." + "Exception: %s", (str(e),)) finally: io.close() def _get_ls_line_count(self):
retries -= 1 io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e))) self.repo.remove_watch(watcher, wid) if failed: printerr("Failure due to error") else: try: results = io.getvalue() return results except Exception, e: printerr("Error getting contents." + "Exception: %s", (str(e),)) finally: io.close() def _get_file_contents(self): """Returns contents of the file, stripped of whitespace at either end """ # An encode will fail if the source code can't be converted to
done = True except RepositoryCommandError, e: if retries > 0: printerr("Command %s returned %d(%s), try again", (e.cmd, e.returncode, e.error)) retries -= 1 io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", (self.path, self.rev, e.cmd, e.returncode, e.error)) except: failed = True printerr("Error obtaining %s@%s.", (self.path, self.rev)) traceback.print_exc() self.repo.remove_watch(watcher, wid) results = None if not failed: try: results = io.getvalue() except Exception, e: printerr("Error getting contents." + "Exception: %s", (str(e),)) finally: io.close() return results
hunks = [h for h in hunks if h[0] not in blames] job = HunkBlameJob(hunks, relative_path, pre_rev) job_pool.push (job) n_blames += 1 if n_blames >= self.MAX_BLAMES: processed_jobs = self.process_finished_jobs (job_pool, write_cursor) n_blames -= processed_jobs if processed_jobs<=self.MAX_BLAMES/5: profiler_start("Joining unprocessed jobs") job_pool.join() profiler_stop("Joining unprocessed jobs", delete=True) except NotValidHunkWarning as e: printerr("Not a valid hunk: "+str(e)) finally: file_rev = read_cursor.fetchone() job_pool.join () self.process_finished_jobs (job_pool, write_cursor, True) try: self.__drop_cache(cnn) except: printdbg("Couldn't drop cache because of " + str(e)) read_cursor.close () write_cursor.close () cnn.close()
hunks = [h for h in hunks if h[0] not in blames] job = HunkBlameJob(hunks, relative_path, pre_rev) job_pool.push(job) n_blames += 1 if n_blames >= self.MAX_BLAMES: processed_jobs = self.process_finished_jobs( job_pool, write_cursor) n_blames -= processed_jobs if processed_jobs <= self.MAX_BLAMES / 5: profiler_start("Joining unprocessed jobs") job_pool.join() profiler_stop("Joining unprocessed jobs", delete=True) except NotValidHunkWarning as e: printerr("Not a valid hunk: " + str(e)) finally: file_rev = read_cursor.fetchone() job_pool.join() self.process_finished_jobs(job_pool, write_cursor, True) try: self.__drop_cache(cnn) except: printdbg("Couldn't drop cache because of " + str(e)) read_cursor.close() write_cursor.close() cnn.close()
def run(self, repo, uri, db): """Fill in the commits_lines table. Create a counter to find number of lines added and removed for each commit in repo, create an object to manage the commits_lines table, for each commit in repo, create an entry in commit_lines table (except for those that already were in the table). """ cnn = db.connect() # Cursor for reading from the database cursor = cnn.cursor() # Cursor for writing to the database write_cursor = cnn.cursor() repo_id = self._get_repo_id(repo, uri, cursor) # Counter to find lines added, removed for each commit counter = create_line_counter_for_repository(repo, uri) # Object to manage the commits_lines table theTableComLines = TableComLines(db, cnn, repo_id) # Object to manage the commits_files_lines table theTableComFilLines = TableComFilLines(db, cnn, repo_id) cursor.execute( "SELECT id, rev, composed_rev " + "FROM scmlog WHERE repository_id = '%s'", repo_id) rows_left = True while rows_left: rows = cursor.fetchmany() for id, revision, composed_rev in rows: if composed_rev: commit = revision.split("|")[0] else: commit = revision cadded = cremoved = 0 if not theTableComLines.in_table(id): (cadded, cremoved) = counter.get_lines_for_commit(commit) theTableComLines.add_pending_row( (None, id, cadded, cremoved)) tadded = tremoved = 0 for path in counter.get_paths_for_commit(commit): if not theTableComFilLines.in_table(str(id) + ',' + path): (added, removed) = \ counter.get_lines_for_commit_file(commit, path) theTableComFilLines.add_pending_row( (None, id, path, added, removed)) tadded += int(added) tremoved += int(removed) # Sanity check if (cadded != tadded) or (cremoved != tremoved): printerr("Sanity check failed: %d, %s, %d, %d, %d, %d" % (id, commit, cadded, tadded, cremoved, tremoved)) printerr(counter.get_paths_for_commit(commit)) theTableComLines.insert_rows(write_cursor) theTableComFilLines.insert_rows(write_cursor) if not rows: rows_left = False cnn.commit() write_cursor.close() cursor.close() cnn.close()
def run(self, repo, repo_uri): def write_line (data, io): io.write (data) # start = datetime.now() self.repo = repo self.repo_uri = repo_uri self.repo_type = self.repo.get_type() if self.repo_type == 'cvs': # CVS self.paths contain the module stuff uri = self.repo.get_uri_for_self.path(self.repo_uri) module = uri[len(self.repo.get_uri()):].strip('/') if module != '.': self.path = self.path[len(module):].strip('/') else: self.path = self.path.strip('/') else: self.path = self.path.strip('/') suffix = '' filename = os.path.basename(self.path) ext_ptr = filename.rfind('.') if ext_ptr != -1: suffix = filename[ext_ptr:] io = BytesIO() wid = self.repo.add_watch(CAT, write_line, io) # Git doesn't need retries because all of the revisions # are already on disk if self.repo_type == 'git': retries = 0 else: retries = 3 done = False failed = False # print "Before downloadning file revision: %s"%(datetime.now()-start) # Try downloading the file revision while not done and not failed: try: self.repo.cat(os.path.join(self.repo_uri, self.path), self.rev) # print "After cat: %s"%(datetime.now()-start) done = True except RepositoryCommandError, e: if retries > 0: printerr("Command %s returned %d(%s), try again",\ (e.cmd, e.returncode, e.error)) retries -= 1 io.seek(0) elif retries == 0: failed = True printerr("Error obtaining %s@%s. " + "Command %s returned %d(%s)", \ (self.path, self.rev, e.cmd, \ e.returncode, e.error)) except Exception, e: failed = True printerr("Error obtaining %s@%s. Exception: %s", \ (self.path, self.rev, str(e)))
repo_func(os.path.join(self.repo_uri, self.path), self.rev) done = True except RepositoryCommandError, e: if retries > 0: printerr("Command %s returned %d(%s), try again", (e.cmd, e.returncode, e.error)) retries -= 1 io.seek(0) elif retries == 0: failed = True printerr( "Error obtaining %s@%s. " + "Command %s returned %d(%s)", (self.path, self.rev, e.cmd, e.returncode, e.error), ) except: failed = True printerr("Error obtaining %s@%s.", (self.path, self.rev)) traceback.print_exc() self.repo.remove_watch(watcher, wid) results = None if not failed: try: results = io.getvalue() except Exception, e: printerr("Error getting contents." + "Exception: %s", (str(e),)) finally: io.close() return results def _get_file_contents(self):