def __load_caches_from_disk (self): printdbg ("DBContentHandler: Loading caches from disk (%s)", (self.cache_file,)) f = open (self.cache_file, 'r') (self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache) = load (f) f.close ()
def _get_uri_and_repo(path): """ Get a URI and repositoryhandler object for a path. This function returns a URI as a string, and the repositoryhandler object that represents that URI. They are returned together as a tuple. Args: path: The path to the repository """ # Create repository if path is not None: try: printdbg("Creating repositoryhandler instance") repo = create_repository_from_path(path) repo.timeout = 120 except RepositoryUnknownError: printerr("Path %s doesn't seem to point to a repository " + \ "supported by cvsanaly", (path,)) sys.exit(1) except Exception, e: printerr("Unknown error creating repository for path %s (%s)", (path, str(e))) sys.exit(1) uri = repo.get_uri_for_path(path) return (uri, repo)
def __load_caches_from_disk(self): printdbg("DBContentHandler: Loading caches from disk (%s)", (self.cache_file,)) f = open(self.cache_file, 'r') (self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache) = load(f) f.close()
def ensure_person(person): profiler_start("Ensuring person %s for repository %d", (person.name, self.repo_id)) printdbg("DBContentHandler: ensure_person %s <%s>", (person.name, person.email)) cursor = self.cursor name = to_utf8(person.name) email = person.email if email is not None: email = to_utf8(email).decode("utf-8") cursor.execute(statement( "SELECT id from people where name = ?", self.db.place_holder), (to_utf8(name).decode("utf-8"),)) rs = cursor.fetchone() if not rs: p = DBPerson(None, person) cursor.execute(statement(DBPerson.__insert__, self.db.place_holder), (p.id, to_utf8(p.name).decode("utf-8"), email)) person_id = p.id else: person_id = rs[0] profiler_stop("Ensuring person %s for repository %d", (person.name, self.repo_id), True) return person_id
def __reader(self, templog, queue): def commit_cb(item): queue.put(item) printdbg("DBProxyContentHandler: thread __reader started") templog.foreach(commit_cb, self.order) printdbg("DBProxyContentHandler: thread __reader finished")
def _get_extensions_manager(extensions, hard_order=False): try: printdbg("Starting ExtensionsManager") emg = ExtensionsManager(extensions, hard_order=hard_order) return emg except InvalidExtension, e: printerr("Invalid extension %s", (e.name, )) sys.exit(1)
def __save_caches_to_disk(self): printdbg("DBContentHandler: Saving caches to disk (%s)", (self.cache_file,)) cache = [self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache] f = open(self.cache_file, 'w') dump(cache, f, -1) f.close()
def __save_caches_to_disk (self): printdbg ("DBContentHandler: Saving caches to disk (%s)", (self.cache_file,)) cache = [self.file_cache, self.moves_cache, self.deletes_cache, self.revision_cache, self.branch_cache, self.tags_cache, self.people_cache] f = open (self.cache_file, 'w') dump (cache, f, -1) f.close ()
def _get_extensions_manager(extensions, hard_order=False): try: printdbg("Starting ExtensionsManager") emg = ExtensionsManager(extensions, hard_order=hard_order) return emg except InvalidExtension, e: printerr("Invalid extension %s", (e.name,)) sys.exit(1)
def __execute (self): q = "%s LIMIT %d OFFSET %d" % (self.query, self.interval_size, self.i) self.i += self.interval_size printdbg (q) if self.args: self.cursor.execute (q, self.args) else: self.cursor.execute (q) self.need_exec = False
def __execute(self): q = "%s LIMIT %d OFFSET %d" % (self.query, self.interval_size, self.i) self.i += self.interval_size printdbg(q) if self.args: self.cursor.execute(q, self.args) else: self.cursor.execute(q) self.need_exec = False
def statement(str, ph_mark): if "?" == ph_mark or "?" not in str: printdbg(str) return str tokens = str.split("'") for i in range(0, len(tokens), 2): tokens[i] = tokens[i].replace("?", ph_mark) retval = "'".join(tokens) printdbg(retval) return retval
def end(self): # flush pending inserts printdbg("DBContentHandler: flushing pending inserts") self.__insert_many() # Save the caches to disk profiler_start("Saving caches to disk") self.__save_caches_to_disk() profiler_stop("Saving caches to disk", delete=True) self.cursor.close() self.cnn.close() self.cnn = None
def ensure_branch(branch): profiler_start("Ensuring branch %s for repository %d", (branch, self.repo_id)) printdbg("DBContentHandler: ensure_branch %s", (branch,)) cursor = self.cursor cursor.execute(statement("SELECT id from branches where name = ?", self.db.place_holder), (branch,)) rs = cursor.fetchone() if not rs: b = DBBranch(None, branch) cursor.execute(statement(DBBranch.__insert__, self.db.place_holder), (b.id, b.name)) branch_id = b.id else: branch_id = rs[0] profiler_stop("Ensuring branch %s for repository %d", (branch, self.repo_id), True) return branch_id
def ensure_tag(tag): profiler_start("Ensuring tag %s for repository %d", (tag, self.repo_id)) printdbg("DBContentHandler: ensure_tag %s", (tag,)) cursor = self.cursor cursor.execute(statement("SELECT id from tags where name = ?", self.db.place_holder), (tag,)) rs = cursor.fetchone() if not rs: t = DBTag(None, tag) cursor.execute(statement(DBTag.__insert__, self.db.place_holder), (t.id, t.name)) tag_id = t.id else: tag_id = rs[0] profiler_stop("Ensuring tag %s for repository %d", (tag, self.repo_id), True) return tag_id
def ensure_tag (tag): profiler_start ("Ensuring tag %s for repository %d", (tag, self.repo_id)) printdbg ("DBContentHandler: ensure_tag %s", (tag,)) cursor = self.cursor cursor.execute (statement ("SELECT id from tags where name = ?", self.db.place_holder), (tag,)) rs = cursor.fetchone () if not rs: t = DBTag (None, tag) cursor.execute (statement (DBTag.__insert__, self.db.place_holder), (t.id, t.name)) tag_id = t.id else: tag_id = rs[0] profiler_stop ("Ensuring tag %s for repository %d", (tag, self.repo_id), True) return tag_id
def __get_file_from_moves_cache(self, path): # Path is not in the cache, but it should # Look if any of its parents was moved printdbg("DBContentHandler: looking for path %s in moves cache", (path,)) current_path = path replaces = [] while current_path not in self.file_cache: found = False for new_path in self.moves_cache.keys(): if not current_path.startswith(new_path) or new_path in replaces: continue current_path = current_path.replace(new_path, self.moves_cache[new_path], 1) replaces.append(new_path) found = True if not found: raise FileNotInCache return self.file_cache[current_path]
def __get_file_from_moves_cache (self, path): # Path is not in the cache, but it should # Look if any of its parents was moved printdbg ("DBContentHandler: looking for path %s in moves cache", (path,)) current_path = path replaces = [] while current_path not in self.file_cache: found = False for new_path in self.moves_cache.keys (): if not current_path.startswith (new_path) or new_path in replaces: continue current_path = current_path.replace (new_path, self.moves_cache[new_path], 1) replaces.append (new_path) found = True if not found: raise FileNotInCache return self.file_cache[current_path]
def __convert_commit_actions(self, commit): # We detect here files that have been moved or # copied. Files moved are converted into a # single action of type 'V'. For copied files # we just change its actions type from 'A' to 'C' def find_action(actions, type, path): for action in actions: if action.type == type and action.f1 == path: return action return None remove_actions = [] for action in commit.actions: if action.f2 is not None: # Move or copy action if action.type == 'A': del_action = find_action(commit.actions, 'D', action.f2) if del_action is not None and del_action \ not in remove_actions: # FIXME: See http://goo.gl/eymoH printdbg("SVN Parser: File %s has been renamed to %s", (action.f2, action.f1)) action.type = 'V' remove_actions.append(del_action) else: action.type = 'C' printdbg("SVN Parser: File %s has been copied to %s", (action.f2, action.f1)) # Try to guess if it was a tag # Yes, with svn we are always guessing :-/ tag = self.__guess_tag_from_path(action.f1) if tag is not None: if commit.tags is None: commit.tags = [] commit.tags.append(tag) elif action.type == 'R': # TODO printdbg("SVN Parser: File %s replaced to %s", (action.f2, action.f1)) pass for action in remove_actions: printdbg("SVN Parser: Removing action %s %s", (action.type, action.f1)) commit.actions.remove(action)
def do_delete(self, delete_statement, params=None, error_message="Delete failed, data needs manual cleanup"): if self.repo_id is None: # Repo wasn't found anyway, so continue return True # You can't reference instance variables in default # parameters, so I have to do this. if params is None: params = (self.repo_id,) try: delete_cursor = self.connection.cursor() execute_statement(statement(delete_statement, self.db.place_holder), params, delete_cursor, self.db, error_message) except Exception: printdbg("Deletion exception") finally: delete_cursor.close()
def ensure_path(path, commit_id): profiler_start("Ensuring path %s for repository %d", (path, self.repo_id)) printdbg("DBContentHandler: ensure_path %s", (path,)) prefix, lpath = path.split("://", 1) prefix += "://" tokens = lpath.strip('/').split('/') parent = -1 node_id = None for i, token in enumerate(tokens): rpath = prefix + '/' + '/'.join(tokens[:i + 1]) if not ":///" in path: # If the repo paths don't start with / # remove it here rpath = rpath.replace(':///', '://') printdbg("DBContentHandler: rpath: %s", (rpath,)) try: node_id, parent_id = self.file_cache[rpath] parent = node_id continue except: pass # Rpath not in cache, add it node_id = self.__add_new_file_and_link(token, parent, commit_id) parent_id = parent parent = node_id # Also add to file_paths self.__add_file_path(commit_id, node_id, re.sub('^\d+://', '', rpath)) self.file_cache[rpath] = (node_id, parent_id) assert node_id is not None printdbg("DBContentHandler: path ensured %s = %d (%d)", (path, node_id, parent_id)) profiler_stop("Ensuring path %s for repository %d", (path, self.repo_id), True) return node_id, parent_id
def end(self): # The log is now in the temp table # Retrieve the data now and pass it to # the real content handler self.templog.flush() printdbg("DBProxyContentHandler: parsing finished, creating thread") self.db_handler.begin() self.db_handler.repository(self.repo_uri) queue = AsyncQueue(50) reader_thread = threading.Thread(target=self.__reader, args=(self.templog, queue)) reader_thread.setDaemon(True) reader_thread.start() # Use the queue with mutexes while the # thread is alive while reader_thread.isAlive(): try: item = queue.get(1) except TimeOut: continue printdbg("DBProxyContentHandler: commit: %s", (item.revision, )) self.db_handler.commit(item) del item # No threads now, we don't need locks printdbg( "DBProxyContentHandler: thread __reader is finished, continue without locks" ) while not queue.empty_unlocked(): item = queue.get_unlocked() self.db_handler.commit(item) del item self.db_handler.end() self.templog.clear()
def end(self): # The log is now in the temp table # Retrieve the data now and pass it to # the real content handler self.templog.flush() printdbg("DBProxyContentHandler: parsing finished, creating thread") self.db_handler.begin() self.db_handler.repository(self.repo_uri) queue = AsyncQueue(50) reader_thread = threading.Thread(target=self.__reader, args=(self.templog, queue)) reader_thread.setDaemon(True) reader_thread.start() # Use the queue with mutexes while the # thread is alive while reader_thread.isAlive(): try: item = queue.get(1) except TimeOut: continue printdbg("DBProxyContentHandler: commit: %s", (item.revision,)) self.db_handler.commit(item) del item # No threads now, we don't need locks printdbg("DBProxyContentHandler: thread __reader is finished, " + \ "continue without locks") while not queue.empty_unlocked(): item = queue.get_unlocked() self.db_handler.commit(item) del item self.db_handler.end() self.templog.clear()
def create_tables(self, cursor): import sqlite3.dbapi2 try: cursor.execute("""CREATE TABLE repositories ( id integer primary key, uri varchar, name varchar, type varchar )""") cursor.execute("""CREATE TABLE people ( id integer primary key, name varchar, email varchar )""") cursor.execute("""CREATE TABLE scmlog ( id integer primary key, rev varchar, committer_id integer, author_id integer, date datetime, message varchar, composed_rev bool, repository_id integer )""") cursor.execute("""CREATE TABLE actions ( id integer primary key, type varchar(1), file_id integer, commit_id integer, branch_id integer )""") cursor.execute("""CREATE TABLE file_copies ( id integer primary key, to_id integer, from_id integer, from_commit_id integer, new_file_name varchar, action_id integer )""") cursor.execute("""CREATE TABLE branches ( id integer primary key, name varchar )""") cursor.execute("""CREATE TABLE files ( id integer primary key, file_name varchar(255), repository_id integer )""") cursor.execute("""CREATE TABLE file_links ( id integer primary key, parent_id integer, file_id integer, commit_id integer )""") cursor.execute("""CREATE TABLE file_paths ( id integer primary key, commit_id integer, file_id integer, file_path varchar(255) )""") cursor.execute("""CREATE TABLE tags ( id integer primary key, name varchar )""") cursor.execute("""CREATE TABLE tag_revisions ( id integer primary key, tag_id integer, commit_id integer )""") cursor.execute("CREATE index files_file_name on files(file_name)") cursor.execute("CREATE index scmlog_date on scmlog(date)") cursor.execute("CREATE index scmlog_repo on scmlog(repository_id)") self._create_views(cursor) except sqlite3.dbapi2.OperationalError as e: printdbg("Exception creating SQLite tables: " + str(e)) raise TableAlreadyExists except: raise
def __init__ (self): options = Config() self.backend = create_backend (options.type) printdbg ("Bicho object created, options and backend initialized")
def _parse_line (self, line): if line is None or line == '': return # Ignore for patt in self.patterns['ignore']: if patt.match (line): return # Commit match = self.patterns['commit'].match (line) if match: if self.commit is not None and self.branch.is_remote (): if self.branch.tail.svn_tag is None: # Skip commits on svn tags self.handler.commit (self.branch.tail.commit) self.commit = Commit () self.commit.revision = match.group (1) parents = match.group (3) if parents: parents = parents.split () git_commit = self.GitCommit (self.commit, parents) decorate = match.group (5) branch = None if decorate: # Remote branch m = re.search (self.patterns['branch'], decorate) if m: branch = self.GitBranch (self.GitBranch.REMOTE, m.group (1), git_commit) printdbg ("Branch '%s' head at acommit %s", (branch.name, self.commit.revision)) else: # Local Branch m = re.search (self.patterns['local-branch'], decorate) if m: branch = self.GitBranch (self.GitBranch.LOCAL, m.group (1), git_commit) printdbg ("Commit %s on local branch '%s'", (self.commit.revision, branch.name)) # If local branch was merged we just ignore this decoration if self.branch and self.branch.is_my_parent (git_commit): printdbg ("Local branch '%s' was merged", (branch.name,)) branch = None else: # Stash m = re.search (self.patterns['stash'], decorate) if m: branch = self.GitBranch (self.GitBranch.STASH, "stash", git_commit) printdbg ("Commit %s on stash", (self.commit.revision,)) # Tag m = re.search (self.patterns['tag'], decorate) if m: self.commit.tags = [m.group (1)] printdbg ("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0])) if branch is not None and self.branch is not None: # Detect empty branches. Ideally, the head of a branch # can't have children. When this happens is because the # branch is empty, so we just ignore such branch if self.branch.is_my_parent (git_commit): printout ("Warning: Detected empty branch '%s', it'll be ignored", (branch.name,)) branch = None if len (self.branches) >= 2: # If current commit is the start point of a new branch # we have to look at all the current branches since # we haven't inserted the new branch yet. # If not, look at all other branches excluding the current one for i, b in enumerate (self.branches): if i == 0 and branch is None: continue if b.is_my_parent (git_commit): # We assume current branch is always the last one # AFAIK there's no way to make sure this is right printdbg ("Start point of branch '%s' at commit %s", (self.branches[0].name, self.commit.revision)) self.branches.pop (0) self.branch = b if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent (git_commit): # There's a pending tag in previous commit pending_tag = self.branch.tail.svn_tag printdbg ("Move pending tag '%s' from previous commit %s to current %s", (pending_tag, self.branch.tail.commit.revision, self.commit.revision)) if self.commit.tags and pending_tag not in self.commit.tags: self.commit.tags.append (pending_tag) else: self.commit.tags = [pending_tag] self.branch.tail.svn_tag = None if branch is not None: self.branch = branch # Insert master always at the end if branch.is_remote () and branch.name == 'master': self.branches.append (self.branch) else: self.branches.insert (0, self.branch) else: self.branch.set_tail (git_commit) return # Committer match = self.patterns['committer'].match (line) if match: self.commit.committer = Person () self.commit.committer.name = match.group (1) self.commit.committer.email = match.group (2) self.handler.committer (self.commit.committer) return # Author match = self.patterns['author'].match (line) if match: self.commit.author = Person () self.commit.author.name = match.group (1) self.commit.author.email = match.group (2) self.handler.author (self.commit.author) return # Date match = self.patterns['date'].match (line) if match: self.commit.date = datetime.datetime (* (time.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")[0:6])) # datetime.datetime.strptime not supported by Python2.4 #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y") return # File match = self.patterns['file'].match (line) if match: action = Action () action.type = match.group (1) action.f1 = match.group (2) self.commit.actions.append (action) self.handler.file (action.f1) return # File moved/copied match = self.patterns['file-moved'].match (line) if match: action = Action () type = match.group (1) if type == 'R': action.type = 'V' else: action.type = type action.f1 = match.group (3) action.f2 = match.group (2) action.rev = self.commit.revision self.commit.actions.append (action) self.handler.file (action.f1) return # This is a workaround for a bug in the GNOME Git migration # There are commits on tags not correctly detected like this one: # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f # We want to ignore commits on tags since it doesn't make any sense in Git if self.is_gnome: match = self.patterns['svn-tag'].match (line.strip ()) if match: printout ("Warning: detected a commit on a svn tag: %s", (match.group (0),)) tag = match.group (1) if self.commit.tags and tag in self.commit.tags: # The commit will be ignored, so move the tag # to the next (previous in history) commit self.branch.tail.svn_tag = tag # Message self.commit.message += line + '\n' assert True, "Not match for line %s" % (line)
def _parse_line(self, line): if line is None or line == '': return # Ignore for patt in self.patterns['ignore']: if patt.match(line): return # Commit match = self.patterns['commit'].match(line) if match: if self.commit is not None: # Skip commits on svn tags if self.branch.tail.svn_tag is None: self.handler.commit(self.branch.tail.commit) if self.patterns['replace-commit'].search(line): printdbg("Skipping commit, because it's a replacement") self.commit = None return self.commit = Commit() self.commit.revision = match.group(1) parents = match.group(3) if parents: parents = parents.split() git_commit = self.GitCommit(self.commit, parents) # If a specific branch has been configured, there # won't be any decoration, so a branch needs to be # created if Config().branch is not None: self.branch = self.GitBranch(self.GitBranch.LOCAL, Config().branch, git_commit) decorate = match.group(5) branch = None if decorate: # Remote branch m = re.search(self.patterns['branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.REMOTE, m.group(2), git_commit) printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision)) else: # Local Branch m = re.search(self.patterns['local-branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit) printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name)) # If local branch was merged we just ignore this # decoration if self.branch and \ self.branch.is_my_parent(git_commit): printdbg("Local branch '%s' was merged", (branch.name, )) branch = None else: # Stash m = re.search(self.patterns['stash'], decorate) if m: branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit) printdbg("Commit %s on stash", (self.commit.revision, )) # Tag m = re.search(self.patterns['tag'], decorate) if m: self.commit.tags = [m.group(1)] printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0])) if branch is not None and self.branch is not None: # Detect empty branches. Ideally, the head of a branch # can't have children. When this happens is because the # branch is empty, so we just ignore such branch if self.branch.is_my_parent(git_commit): printout("Warning: Detected empty branch '%s', " + \ "it'll be ignored", (branch.name,)) branch = None if len(self.branches) >= 2: # If current commit is the start point of a new branch # we have to look at all the current branches since # we haven't inserted the new branch yet. # If not, look at all other branches excluding the current one for i, b in enumerate(self.branches): if i == 0 and branch is None: continue if b.is_my_parent(git_commit): # We assume current branch is always the last one # AFAIK there's no way to make sure this is right printdbg("Start point of branch '%s' at commit %s", (self.branches[0].name, self.commit.revision)) self.branches.pop(0) self.branch = b if self.branch and self.branch.tail.svn_tag is not None and \ self.branch.is_my_parent(git_commit): # There's a pending tag in previous commit pending_tag = self.branch.tail.svn_tag printdbg("Move pending tag '%s' from previous commit %s " + \ "to current %s", (pending_tag, self.branch.tail.commit.revision, self.commit.revision)) if self.commit.tags and pending_tag not in self.commit.tags: self.commit.tags.append(pending_tag) else: self.commit.tags = [pending_tag] self.branch.tail.svn_tag = None if branch is not None: self.branch = branch # Insert master always at the end if branch.is_remote() and branch.name == 'master': self.branches.append(self.branch) else: self.branches.insert(0, self.branch) else: self.branch.set_tail(git_commit) if parents and len(parents) > 1 and not Config().analyze_merges: #Skip merge commits self.commit = None return elif self.commit is None: return # Committer match = self.patterns['committer'].match(line) if match: self.commit.committer = Person() self.commit.committer.name = match.group(1) self.commit.committer.email = match.group(2) self.handler.committer(self.commit.committer) return # Author match = self.patterns['author'].match(line) if match: self.commit.author = Person() self.commit.author.name = match.group(1) self.commit.author.email = match.group(2) self.handler.author(self.commit.author) return # Commit Date match = self.patterns['commit-date'].match(line) if match: self.commit.commit_date = datetime.datetime(*(time.strptime(\ match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) return # Author Date match = self.patterns['author-date'].match(line) if match: self.commit.author_date = datetime.datetime(*(time.strptime(\ match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) return # File match = self.patterns['file'].match(line) if match: action = Action() action.type = match.group(1) action.f1 = match.group(2) self.commit.actions.append(action) self.handler.file(action.f1) return # File moved/copied match = self.patterns['file-moved'].match(line) if match: action = Action() type = match.group(1) if type == 'R': action.type = 'V' else: action.type = type action.f1 = match.group(3) action.f2 = match.group(2) action.rev = self.commit.revision self.commit.actions.append(action) self.handler.file(action.f1) return # Message self.commit.message += line + '\n' assert True, "Not match for line %s" % (line)
def _parse_line(self, line): if line is None or line == '': return # Ignore for patt in self.patterns['ignore']: if patt.match(line): return # Commit match = self.patterns['commit'].match(line) if match: if self.commit is not None and self.branch is not None: if self.branch.tail.svn_tag is None: # Skip commits on svn tags self.handler.commit(self.branch.tail.commit) self.commit = Commit() self.commit.revision = match.group(1) parents = match.group(3) if parents: parents = parents.split() self.commit.parents = parents git_commit = self.GitCommit(self.commit, parents) decorate = match.group(5) branch = None if decorate: # Remote branch m = re.search(self.patterns['branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit) printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision)) else: # Local Branch m = re.search(self.patterns['local-branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit) printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name)) # If local branch was merged we just ignore this decoration if self.branch and self.branch.is_my_parent(git_commit): printdbg("Local branch '%s' was merged", (branch.name,)) branch = None else: # Stash m = re.search(self.patterns['stash'], decorate) if m: branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit) printdbg("Commit %s on stash", (self.commit.revision,)) # Tag m = re.search(self.patterns['tag'], decorate) if m: self.commit.tags = [m.group(1)] printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0])) if not branch and not self.branch: branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit) printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name)) # This part of code looks wired at first time so here is a small description what it does: # # * self.branch is the branch to which the last inspected commit belonged to # * branch is the branch of the current parsed commit # # This check is only to find branches which are fully merged into a already analyzed branch # # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64 if branch is not None and self.branch is not None: # Detect empty branches. # Ideally, the head of a branch can't have children. # When this happens is because the branch is empty, so we just ignore such branch. if self.branch.is_my_parent(git_commit): printout( "Info: Branch '%s' will be ignored, because it was already merged in an active one.", (branch.name,) ) branch = None if len(self.branches) >= 2: # If current commit is the start point of a new branch # we have to look at all the current branches since # we haven't inserted the new branch yet. # If not, look at all other branches excluding the current one for i, b in enumerate(self.branches): if i == 0 and branch is None: continue if b.is_my_parent(git_commit): # We assume current branch is always the last one # AFAIK there's no way to make sure this is right printdbg("Start point of branch '%s' at commit %s", (self.branches[0].name, self.commit.revision)) self.branches.pop(0) self.branch = b if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit): # There's a pending tag in previous commit pending_tag = self.branch.tail.svn_tag printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag, self.branch.tail.commit.revision, self.commit.revision)) if self.commit.tags and pending_tag not in self.commit.tags: self.commit.tags.append(pending_tag) else: self.commit.tags = [pending_tag] self.branch.tail.svn_tag = None if branch is not None: self.branch = branch # Insert master always at the end if branch.name == 'master': self.branches.append(self.branch) else: self.branches.insert(0, self.branch) else: if self.branch is not None: self.branch.set_tail(git_commit) return # Committer match = self.patterns['committer'].match(line) if match: self.commit.committer = Person() self.commit.committer.name = match.group(1) self.commit.committer.email = match.group(2) self.handler.committer(self.commit.committer) return # Author match = self.patterns['author'].match(line) if match: self.commit.author = Person() self.commit.author.name = match.group(1) self.commit.author.email = match.group(2) self.handler.author(self.commit.author) return # Commit date match = self.patterns['date'].match(line) if match: self.commit.date = datetime.datetime( *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) # datetime.datetime.strptime not supported by Python2.4 #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y") # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan) # This string will be parsed to int and recalculated into seconds (60 * 60) self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100) return # Author date match = self.patterns['author_date'].match(line) if match: self.commit.author_date = datetime.datetime( *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) # datetime.datetime.strptime not supported by Python2.4 #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y") # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan) # This string will be parsed to int and recalculated into seconds (60 * 60) self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100) return # File match = self.patterns['file'].match(line) if match: action = Action() type = match.group(1) if len(type) > 1: # merge actions if 'M' in type: type = 'M' else: # ignore merge actions without 'M' return action.type = type action.f1 = match.group(2) self.commit.actions.append(action) self.handler.file(action.f1) return # File moved/copied match = self.patterns['file-moved'].match(line) if match: action = Action() type = match.group(1) if type == 'R': action.type = 'V' else: action.type = type action.f1 = match.group(3) action.f2 = match.group(2) action.rev = self.commit.revision self.commit.actions.append(action) self.handler.file(action.f1) return # This is a workaround for a bug in the GNOME Git migration # There are commits on tags not correctly detected like this one: # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f # We want to ignore commits on tags since it doesn't make any sense in Git if self.is_gnome: match = self.patterns['svn-tag'].match(line.strip()) if match: printout("Warning: detected a commit on a svn tag: %s", (match.group(0),)) tag = match.group(1) if self.commit.tags and tag in self.commit.tags: # The commit will be ignored, so move the tag # to the next (previous in history) commit self.branch.tail.svn_tag = tag # Message self.commit.message += line + '\n' assert True, "Not match for line %s" % (line)
# Note: Default values for options are defined on # configuration module usage = 'Usage: %prog [options]' try: Config.set_config_options(usage) except (ErrorLoadingConfig, InvalidConfig), e: printerr(str(e)) sys.exit(2) try: backend = Backend.create_backend(Config.backend) except ImportError, e: printerr("Backend ''" + Config.backend + "'' not exists. " + str(e)) sys.exit(2) printdbg("Bicho object created, options and backend initialized") backend.run() if Config.logtable: try: ilogger = IssueLogger.create_logger(Config.backend) except ImportError, e: printerr("Logger ''" + Config.backend + "'' doesn't exist. " + str(e)) sys.exit(2) printdbg("Bicho logger object created") ilogger.run() if __name__ == "__main__": main()
def __get_file_for_path(self, path, commit_id, old=False): """Get a pair of (node_id, parent_id) regarding a path. First, it looks at file_cache, the at the moves cache, then at the deleted cache and finally, when it is not found in the cache, it is added and linked in the database. """ def ensure_path(path, commit_id): profiler_start("Ensuring path %s for repository %d", (path, self.repo_id)) printdbg("DBContentHandler: ensure_path %s", (path,)) prefix, lpath = path.split("://", 1) prefix += "://" tokens = lpath.strip('/').split('/') parent = -1 node_id = None for i, token in enumerate(tokens): rpath = prefix + '/' + '/'.join(tokens[:i + 1]) if not ":///" in path: # If the repo paths don't start with / # remove it here rpath = rpath.replace(':///', '://') printdbg("DBContentHandler: rpath: %s", (rpath,)) try: node_id, parent_id = self.file_cache[rpath] parent = node_id continue except: pass # Rpath not in cache, add it node_id = self.__add_new_file_and_link(token, parent, commit_id) parent_id = parent parent = node_id # Also add to file_paths self.__add_file_path(commit_id, node_id, re.sub('^\d+://', '', rpath)) self.file_cache[rpath] = (node_id, parent_id) assert node_id is not None printdbg("DBContentHandler: path ensured %s = %d (%d)", (path, node_id, parent_id)) profiler_stop("Ensuring path %s for repository %d", (path, self.repo_id), True) return node_id, parent_id printdbg("DBContentHandler: Looking for path %s in cache", (path,)) # First of all look at the cache try: return self.file_cache[path] except KeyError: pass # It's not in the cache look now at moves cache try: retval = self.__get_file_from_moves_cache(path) printdbg("DBContentHandler: Found %s in moves cache", (path,)) self.file_cache[path] = retval return retval except FileNotInCache: pass # If it's an old file (that is, the path has been # taken from the "from" part of an action that # has two paths) it might be deletes or replaced if old: try: return self.deletes_cache[path] except KeyError: pass # It hasen't been moved (or any of its parents) # so it was copied at some point return ensure_path(path, commit_id)
def _parse_line(self, line): if not line: if self.commit is not None and self.state == SVNParser.COMMIT \ or self.state == SVNParser.FILES: self.state = SVNParser.MESSAGE elif self.state == SVNParser.MESSAGE: self.__append_message_line() return # Message if self.state == SVNParser.MESSAGE and self.msg_lines > 0: self.__append_message_line(line) return # Invalid commit. Some svn repos like asterisk have commits like this: # r176840 | (no author) | (no date) | 1 line # without any canged path, so I think we can just ignore them if self.patterns['invalid'].match(line): printdbg("SVN Parser: skipping invalid commit: %s", (line, )) self.state = SVNParser.COMMIT self.commit = None return # Separator if self.patterns['separator'].match(line): if self.commit is None or self.state == SVNParser.COMMIT: return elif self.state == SVNParser.MESSAGE \ or self.state == SVNParser.FILES: # We can go directly from FILES to COMMIT # when there is an empty log message if self.msg_lines > 0: printout("Warning (%d): parsing svn log, missing " + \ "lines in commit message!", (self.n_line,)) self.__convert_commit_actions(self.commit) self.handler.commit(self.commit) self.state = SVNParser.COMMIT self.commit = None self.msg_lines = 0 else: printout("Warning (%d): parsing svn log, unexpected separator", (self.n_line, )) return # Commit match = self.patterns['commit'].match(line) if match and self.state == SVNParser.COMMIT: commit = Commit() commit.revision = match.group(1) commit.committer = Person() commit.committer.name = match.group(2) commit.date = datetime.datetime(int(match.group(3)), int(match.group(4)), int(match.group(5)), int(match.group(6)), int(match.group(7)), int(match.group(8))) self.msg_lines = int(match.group(10)) self.commit = commit self.handler.committer(commit.committer) return elif match and self.state == SVNParser.MESSAGE: # It seems a piece of a log message has been copied as # part of the commit message self.commit.message += line + '\n' return elif match and self.state != SVNParser.COMMIT: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return # Files if self.state == SVNParser.COMMIT: if self.patterns['paths'].match(line): self.state = SVNParser.FILES else: printout("Warning(%d): parsing svn log, unexpected line %s", (self.n_line, line)) return # File moved/copied/replaced match = self.patterns['file-moved'].match(line) if match: if self.state != SVNParser.FILES: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return action = Action() action.type = match.group(1) action.f1 = match.group(2) action.f2 = match.group(3) action.rev = match.group(4) action.branch_f1 = self.__guess_branch_from_path(action.f1) action.branch_f2 = self.__guess_branch_from_path(action.f2) self.commit.actions.append(action) self.handler.file(action.f1) return # File match = self.patterns['file'].match(line) if match: if self.state != SVNParser.FILES: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return path = match.group(2) if path != '/': # path == '/' is probably a properties change in / # not interesting for us, ignoring action = Action() action.type = match.group(1) action.f1 = path action.branch_f1 = self.__guess_branch_from_path(path) self.commit.actions.append(action) self.handler.file(path) return
def __get_file_for_path(self, path, commit_id, old=True): """Get a pair of (node_id, parent_id) regarding a path. First, it looks at file_cache, the at the moves cache, then at the deleted cache and finally, when it is not found in the cache, it is added and linked in the database. """ def ensure_path(path, commit_id): profiler_start("Ensuring path %s for repository %d", (path, self.repo_id)) printdbg("DBContentHandler: ensure_path %s", (path, )) prefix, lpath = path.split("://", 1) prefix += "://" tokens = lpath.strip('/').split('/') parent = -1 node_id = None for i, token in enumerate(tokens): file_path = '/'.join(tokens[:i + 1]) rpath = prefix + '/' + file_path if not ":///" in path: # If the repo paths don't start with / # remove it here rpath = rpath.replace(':///', '://') printdbg("DBContentHandler: rpath: %s", (rpath, )) try: node_id, parent_id = self.file_cache[rpath] parent = node_id continue except: pass # Rpath not in cache, add it node_id = self.__add_new_file_and_link(token, parent, commit_id, file_path) parent_id = parent parent = node_id self.file_cache[rpath] = (node_id, parent_id) assert node_id is not None printdbg("DBContentHandler: path ensured %s = %d (%d)", (path, node_id, parent_id)) profiler_stop("Ensuring path %s for repository %d", (path, self.repo_id), True) return node_id, parent_id printdbg("DBContentHandler: Looking for path %s in cache", (path, )) # First of all look at the cache try: return self.file_cache[path] except KeyError: pass # It's not in the cache look now at moves cache try: retval = self.__get_file_from_moves_cache(path) printdbg("DBContentHandler: Found %s in moves cache", (path, )) self.file_cache[path] = retval return retval except FileNotInCache: pass # Due to branching, the file may be deleted in other branches, # and thus in deletes_cache. Unless in A action when we are # pretty sure that it is a new file, we should always look # at the deletes_cache for file_id if old and path in self.deletes_cache: return self.deletes_cache[path] # It hasen't been moved (or any of its parents) # so it was copied at some point return ensure_path(path, commit_id)
def backout_extensions(self, repo, uri, db): printdbg("Called backout extensions") self.run_extensions(repo, uri, db, backout=True)
config.bug_fix_regexes_case_sensitive = \ bug_fix_regexes_case_sensitive if not config.extensions and config.no_parse: # Do nothing!!! return 0 if config.debug: import repositoryhandler repositoryhandler.backends.DEBUG = True path = uri_to_filename(uri) (uri, repo) = _get_uri_and_repo(path) if not config.no_parse: printdbg("Preparing logging") # Create reader reader = LogReader() reader.set_repo(repo, path or uri) reader.set_branch(config.branch) # Create parser if config.repo_logfile is not None: parser = create_parser_from_logfile(config.repo_logfile) reader.set_logfile(config.repo_logfile) else: parser = _get_parser_from_repository(repo) parser.set_repository(repo, uri) if parser is None:
""" # Note: Default values for options are defined in # configuration module usage = 'Usage: %prog [options]' try: Config.set_config_options(usage) except (ErrorLoadingConfig, InvalidConfig), e: printerr(str(e)) sys.exit(2) try: backend = Backend.create_backend(Config.backend) except ImportError, e: printerr("Backend ''" + Config.backend + "'' doesn't exist. " + str(e)) sys.exit(2) printdbg("Bicho object created, options and backend initialized") backend.run() if Config.logtable: try: ilogger = IssueLogger.create_logger(Config.backend) except ImportError, e: printerr("Logger ''" + Config.backend + "'' doesn't exist. " + str(e)) sys.exit(2) printdbg("Bicho logger object created") ilogger.run() if __name__ == "__main__": main()
def _parse_line(self, line): if line is None or line == '': return # Ignore for patt in self.patterns['ignore']: if patt.match(line): return # Commit match = self.patterns['commit'].match(line) if match: if self.commit is not None: # Skip commits on svn tags if self.branch.tail.svn_tag is None: self.handler.commit(self.branch.tail.commit) self.commit = Commit() self.commit.revision = match.group(1) parents = match.group(3) if parents: parents = parents.split() git_commit = self.GitCommit(self.commit, parents) # If a specific branch has been configured, there # won't be any decoration, so a branch needs to be # created if Config().branch is not None: self.branch = self.GitBranch(self.GitBranch.LOCAL, Config().branch, git_commit) decorate = match.group(5) branch = None if decorate: # Remote branch m = re.search(self.patterns['branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit) printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision)) else: # Local Branch m = re.search(self.patterns['local-branch'], decorate) if m: branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit) printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name)) # If local branch was merged we just ignore this # decoration if self.branch and \ self.branch.is_my_parent(git_commit): printdbg("Local branch '%s' was merged", (branch.name,)) branch = None else: # Stash m = re.search(self.patterns['stash'], decorate) if m: branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit) printdbg("Commit %s on stash", (self.commit.revision,)) # Tag m = re.search(self.patterns['tag'], decorate) if m: self.commit.tags = [m.group(1)] printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0])) if branch is not None and self.branch is not None: # Detect empty branches. Ideally, the head of a branch # can't have children. When this happens is because the # branch is empty, so we just ignore such branch if self.branch.is_my_parent(git_commit): printout("Warning: Detected empty branch '%s', " + \ "it'll be ignored", (branch.name,)) branch = None if len(self.branches) >= 2: # If current commit is the start point of a new branch # we have to look at all the current branches since # we haven't inserted the new branch yet. # If not, look at all other branches excluding the current one for i, b in enumerate(self.branches): if i == 0 and branch is None: continue if b.is_my_parent(git_commit): # We assume current branch is always the last one # AFAIK there's no way to make sure this is right printdbg("Start point of branch '%s' at commit %s", (self.branches[0].name, self.commit.revision)) self.branches.pop(0) self.branch = b if self.branch and self.branch.tail.svn_tag is not None and \ self.branch.is_my_parent(git_commit): # There's a pending tag in previous commit pending_tag = self.branch.tail.svn_tag printdbg("Move pending tag '%s' from previous commit %s " + \ "to current %s", (pending_tag, self.branch.tail.commit.revision, self.commit.revision)) if self.commit.tags and pending_tag not in self.commit.tags: self.commit.tags.append(pending_tag) else: self.commit.tags = [pending_tag] self.branch.tail.svn_tag = None if branch is not None: self.branch = branch # Insert master always at the end if branch.is_remote() and branch.name == 'master': self.branches.append(self.branch) else: self.branches.insert(0, self.branch) else: self.branch.set_tail(git_commit) if parents and len(parents) > 1 and not Config().analyze_merges: #Skip merge commits self.commit = None return elif self.commit is None: return # Committer match = self.patterns['committer'].match(line) if match: self.commit.committer = Person() self.commit.committer.name = match.group(1) self.commit.committer.email = match.group(2) self.handler.committer(self.commit.committer) return # Author match = self.patterns['author'].match(line) if match: self.commit.author = Person() self.commit.author.name = match.group(1) self.commit.author.email = match.group(2) self.handler.author(self.commit.author) return # Commit Date match = self.patterns['commit-date'].match(line) if match: self.commit.commit_date = datetime.datetime(*(time.strptime(\ match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) return # Author Date match = self.patterns['author-date'].match(line) if match: self.commit.author_date = datetime.datetime(*(time.strptime(\ match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6])) return # File match = self.patterns['file'].match(line) if match: action = Action() action.type = match.group(1) action.f1 = match.group(2) self.commit.actions.append(action) self.handler.file(action.f1) return # File moved/copied match = self.patterns['file-moved'].match(line) if match: action = Action() type = match.group(1) if type == 'R': action.type = 'V' else: action.type = type action.f1 = match.group(3) action.f2 = match.group(2) action.rev = self.commit.revision self.commit.actions.append(action) self.handler.file(action.f1) return # Message self.commit.message += line + '\n' assert True, "Not match for line %s" % (line)
def commit(self, commit): if commit.revision in self.revision_cache: return profiler_start("New commit %s for repository %d", (commit.revision, self.repo_id)) log = DBLog(None, commit) log.repository_id = self.repo_id self.revision_cache[commit.revision] = log.id log.committer = self.__get_person(commit.committer) if commit.author == commit.committer: log.author = log.committer elif commit.author is not None: log.author = self.__get_person(commit.author) self.commits.append(log) printdbg("DBContentHandler: commit: %d rev: %s", (log.id, log.rev)) # TODO: sort actions? R, A, D, M, V, C for action in commit.actions: printdbg("DBContentHandler: Action: %s", (action.type,)) dbaction = DBAction(None, action.type) dbaction.commit_id = log.id branch = commit.branch or action.branch_f1 branch_id = self.__get_branch(branch) dbaction.branch_id = branch_id prefix = "%d://" % (branch_id) path = prefix + action.f1 if action.type == 'A': # A file has been added file_id = self.__action_add(path, prefix, log) elif action.type == 'M': # A file has been modified file_id = self.__get_file_for_path(path, log.id)[0] elif action.type == 'D': # A file has been deleted file_id = self.__action_delete(path, log) elif action.type == 'V': # A file has been renamed file_id = self.__action_rename(path, prefix, log, action, dbaction) elif action.type == 'C': # A file has been copied file_id = self.__action_copy(path, prefix, log, action, dbaction) elif action.type == 'R': # A file has been replaced file_id = self.__action_replace(path, prefix, log, action, dbaction) if file_id is None: continue else: assert "Unknown action type %s" % (action.type) dbaction.file_id = file_id self.actions.append(dbaction) # Tags if commit.tags is not None: tag_revs = [] for tag in commit.tags: tag_id = self.__get_tag(tag) db_tagrev = DBTagRev(None) tag_revs.append((db_tagrev.id, tag_id, log.id)) self.cursor.executemany(statement(DBTagRev.__insert__, self.db.place_holder), tag_revs) if len(self.actions) >= self.MAX_ACTIONS: printdbg("DBContentHandler: %d actions inserting", (len(self.actions),)) self.__insert_many() profiler_stop("New commit %s for repository %d", (commit.revision, self.repo_id), True)
def _parse_line(self, line): if not line: if self.commit is not None and self.state == SVNParser.COMMIT \ or self.state == SVNParser.FILES: self.state = SVNParser.MESSAGE elif self.state == SVNParser.MESSAGE: self.__append_message_line() return # Message if self.state == SVNParser.MESSAGE and self.msg_lines > 0: self.__append_message_line(line) return # Invalid commit. Some svn repos like asterisk have commits like this: # r176840 | (no author) | (no date) | 1 line # without any canged path, so I think we can just ignore them if self.patterns['invalid'].match(line): printdbg("SVN Parser: skipping invalid commit: %s", (line,)) self.state = SVNParser.COMMIT self.commit = None return # Separator if self.patterns['separator'].match(line): if self.commit is None or self.state == SVNParser.COMMIT: return elif self.state == SVNParser.MESSAGE \ or self.state == SVNParser.FILES: # We can go directly from FILES to COMMIT # when there is an empty log message if self.msg_lines > 0: printout("Warning (%d): parsing svn log, missing " + \ "lines in commit message!", (self.n_line,)) self.__convert_commit_actions(self.commit) self.handler.commit(self.commit) self.state = SVNParser.COMMIT self.commit = None self.msg_lines = 0 else: printout("Warning (%d): parsing svn log, unexpected separator", (self.n_line,)) return # Commit match = self.patterns['commit'].match(line) if match and self.state == SVNParser.COMMIT: commit = Commit() commit.revision = match.group(1) commit.committer = Person() commit.committer.name = match.group(2) commit.commit_date = datetime.datetime(int(match.group(3)), int(match.group(4)), int(match.group(5)), int(match.group(6)), int(match.group(7)), int(match.group(8))) self.msg_lines = int(match.group(10)) self.commit = commit self.handler.committer(commit.committer) return elif match and self.state == SVNParser.MESSAGE: # It seems a piece of a log message has been copied as # part of the commit message self.commit.message += line + '\n' return elif match and self.state != SVNParser.COMMIT: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return # Files if self.state == SVNParser.COMMIT: if self.patterns['paths'].match(line): self.state = SVNParser.FILES else: printout("Warning(%d): parsing svn log, unexpected line %s", (self.n_line, line)) return # File moved/copied/replaced match = self.patterns['file-moved'].match(line) if match: if self.state != SVNParser.FILES: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return action = Action() action.type = match.group(1) action.f1 = match.group(2) action.f2 = match.group(3) action.rev = match.group(4) action.branch_f1 = self.__guess_branch_from_path(action.f1) action.branch_f2 = self.__guess_branch_from_path(action.f2) self.commit.actions.append(action) self.handler.file(action.f1) return # File match = self.patterns['file'].match(line) if match: if self.state != SVNParser.FILES: printout("Warning (%d): parsing svn log, unexpected line %s", (self.n_line, line)) return path = match.group(2) if path != '/': # path == '/' is probably a properties change in / # not interesting for us, ignoring action = Action() action.type = match.group(1) action.f1 = path action.branch_f1 = self.__guess_branch_from_path(path) self.commit.actions.append(action) self.handler.file(path) return
def __get_file_for_path(self, path, commit_id, old=True): """Get a pair of (node_id, parent_id) regarding a path. First, it looks at file_cache, the at the moves cache, then at the deleted cache and finally, when it is not found in the cache, it is added and linked in the database. """ def ensure_path(path, commit_id): profiler_start("Ensuring path %s for repository %d", (path, self.repo_id)) printdbg("DBContentHandler: ensure_path %s", (path,)) prefix, lpath = path.split("://", 1) prefix += "://" tokens = lpath.strip('/').split('/') parent = -1 node_id = None for i, token in enumerate(tokens): file_path = '/'.join(tokens[:i + 1]) rpath = prefix + '/' + file_path if not ":///" in path: # If the repo paths don't start with / # remove it here rpath = rpath.replace(':///', '://') printdbg("DBContentHandler: rpath: %s", (rpath,)) try: node_id, parent_id = self.file_cache[rpath] parent = node_id continue except: pass # Rpath not in cache, add it node_id = self.__add_new_file_and_link(token, parent, commit_id, file_path) parent_id = parent parent = node_id self.file_cache[rpath] = (node_id, parent_id) assert node_id is not None printdbg("DBContentHandler: path ensured %s = %d (%d)", (path, node_id, parent_id)) profiler_stop("Ensuring path %s for repository %d", (path, self.repo_id), True) return node_id, parent_id printdbg("DBContentHandler: Looking for path %s in cache", (path,)) # First of all look at the cache try: return self.file_cache[path] except KeyError: pass # It's not in the cache look now at moves cache try: retval = self.__get_file_from_moves_cache(path) printdbg("DBContentHandler: Found %s in moves cache", (path,)) self.file_cache[path] = retval return retval except FileNotInCache: pass # Due to branching, the file may be deleted in other branches, # and thus in deletes_cache. Unless in A action when we are # pretty sure that it is a new file, we should always look # at the deletes_cache for file_id if old and path in self.deletes_cache: return self.deletes_cache[path] # It hasen't been moved (or any of its parents) # so it was copied at some point return ensure_path(path, commit_id)