Ejemplo n.º 1
0
 def __load_caches_from_disk (self):
     printdbg ("DBContentHandler: Loading caches from disk (%s)", (self.cache_file,))
     f = open (self.cache_file, 'r')
     (self.file_cache, self.moves_cache, self.deletes_cache,
      self.revision_cache, self.branch_cache, self.tags_cache,
      self.people_cache) = load (f)
     f.close ()
Ejemplo n.º 2
0
def _get_uri_and_repo(path):
    """ Get a URI and repositoryhandler object for a path.

    This function returns a URI as a string, and the repositoryhandler
    object that represents that URI. They are returned together as a tuple.

    Args:
      path: The path to the repository
    """
    # Create repository
    if path is not None:
        try:
            printdbg("Creating repositoryhandler instance")
            repo = create_repository_from_path(path)
            repo.timeout = 120
        except RepositoryUnknownError:
            printerr("Path %s doesn't seem to point to a repository " + \
                     "supported by cvsanaly", (path,))
            sys.exit(1)
        except Exception, e:
            printerr("Unknown error creating repository for path %s (%s)",
                     (path, str(e)))
            sys.exit(1)
        uri = repo.get_uri_for_path(path)
        return (uri, repo)
Ejemplo n.º 3
0
 def __load_caches_from_disk(self):
     printdbg("DBContentHandler: Loading caches from disk (%s)", (self.cache_file,))
     f = open(self.cache_file, 'r')
     (self.file_cache, self.moves_cache, self.deletes_cache,
      self.revision_cache, self.branch_cache, self.tags_cache,
      self.people_cache) = load(f)
     f.close()
Ejemplo n.º 4
0
def _get_uri_and_repo(path):
    """ Get a URI and repositoryhandler object for a path.

    This function returns a URI as a string, and the repositoryhandler
    object that represents that URI. They are returned together as a tuple.

    Args:
      path: The path to the repository
    """
    # Create repository
    if path is not None:
        try:
            printdbg("Creating repositoryhandler instance")
            repo = create_repository_from_path(path)
            repo.timeout = 120
        except RepositoryUnknownError:
            printerr("Path %s doesn't seem to point to a repository " + \
                     "supported by cvsanaly", (path,))
            sys.exit(1)
        except Exception, e:
            printerr("Unknown error creating repository for path %s (%s)",
                     (path, str(e)))
            sys.exit(1)
        uri = repo.get_uri_for_path(path)
        return (uri, repo)
Ejemplo n.º 5
0
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                            (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                      (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(statement(
                "SELECT id from people where name = ?", self.db.place_holder),
                (to_utf8(name).decode("utf-8"),))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(statement(DBPerson.__insert__,
                                self.db.place_holder),
                                (p.id, to_utf8(p.name).decode("utf-8"),
                                 email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                           (person.name, self.repo_id), True)

            return person_id
Ejemplo n.º 6
0
    def __reader(self, templog, queue):
        def commit_cb(item):
            queue.put(item)

        printdbg("DBProxyContentHandler: thread __reader started")
        templog.foreach(commit_cb, self.order)
        printdbg("DBProxyContentHandler: thread __reader finished")
Ejemplo n.º 7
0
def _get_extensions_manager(extensions, hard_order=False):
    try:
        printdbg("Starting ExtensionsManager")
        emg = ExtensionsManager(extensions, hard_order=hard_order)
        return emg
    except InvalidExtension, e:
        printerr("Invalid extension %s", (e.name, ))
        sys.exit(1)
Ejemplo n.º 8
0
 def __save_caches_to_disk(self):
     printdbg("DBContentHandler: Saving caches to disk (%s)", (self.cache_file,))
     cache = [self.file_cache, self.moves_cache, self.deletes_cache,
              self.revision_cache, self.branch_cache, self.tags_cache,
              self.people_cache]
     f = open(self.cache_file, 'w')
     dump(cache, f, -1)
     f.close()
Ejemplo n.º 9
0
 def __save_caches_to_disk (self):
     printdbg ("DBContentHandler: Saving caches to disk (%s)", (self.cache_file,))
     cache = [self.file_cache, self.moves_cache, self.deletes_cache,
              self.revision_cache, self.branch_cache, self.tags_cache,
              self.people_cache]
     f = open (self.cache_file, 'w')
     dump (cache, f, -1)
     f.close ()
Ejemplo n.º 10
0
def _get_extensions_manager(extensions, hard_order=False):
    try:
        printdbg("Starting ExtensionsManager")
        emg = ExtensionsManager(extensions,
                                hard_order=hard_order)
        return emg
    except InvalidExtension, e:
        printerr("Invalid extension %s", (e.name,))
        sys.exit(1)
Ejemplo n.º 11
0
    def __execute (self):
        q = "%s LIMIT %d OFFSET %d" % (self.query, self.interval_size, self.i)
        self.i += self.interval_size

        printdbg (q)
        if self.args:
            self.cursor.execute (q, self.args)
        else:
            self.cursor.execute (q)

        self.need_exec = False
Ejemplo n.º 12
0
    def __execute(self):
        q = "%s LIMIT %d OFFSET %d" % (self.query, self.interval_size, self.i)
        self.i += self.interval_size

        printdbg(q)
        if self.args:
            self.cursor.execute(q, self.args)
        else:
            self.cursor.execute(q)

        self.need_exec = False
Ejemplo n.º 13
0
def statement(str, ph_mark):
    if "?" == ph_mark or "?" not in str:
        printdbg(str)
        return str

    tokens = str.split("'")
    for i in range(0, len(tokens), 2):
        tokens[i] = tokens[i].replace("?", ph_mark)

    retval = "'".join(tokens)
    printdbg(retval)

    return retval
Ejemplo n.º 14
0
def statement(str, ph_mark):
    if "?" == ph_mark or "?" not in str:
        printdbg(str)
        return str

    tokens = str.split("'")
    for i in range(0, len(tokens), 2):
        tokens[i] = tokens[i].replace("?", ph_mark)

    retval = "'".join(tokens)
    printdbg(retval)
    
    return retval
Ejemplo n.º 15
0
    def end(self):
        # flush pending inserts
        printdbg("DBContentHandler: flushing pending inserts")
        self.__insert_many()

        # Save the caches to disk
        profiler_start("Saving caches to disk")
        self.__save_caches_to_disk()
        profiler_stop("Saving caches to disk", delete=True)

        self.cursor.close()
        self.cnn.close()
        self.cnn = None
Ejemplo n.º 16
0
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d", (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?", self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__, self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d", (branch, self.repo_id), True)

            return branch_id
Ejemplo n.º 17
0
        def ensure_tag(tag):
            profiler_start("Ensuring tag %s for repository %d", (tag, self.repo_id))
            printdbg("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from tags where name = ?", self.db.place_holder), (tag,))
            rs = cursor.fetchone()
            if not rs:
                t = DBTag(None, tag)
                cursor.execute(statement(DBTag.__insert__, self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
Ejemplo n.º 18
0
        def ensure_tag (tag):
            profiler_start ("Ensuring tag %s for repository %d",
                            (tag, self.repo_id))
            printdbg ("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute (statement ("SELECT id from tags where name = ?",
                            self.db.place_holder), (tag,))
            rs = cursor.fetchone ()
            if not rs:
                t = DBTag (None, tag)
                cursor.execute (statement (DBTag.__insert__,
                                self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop ("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
Ejemplo n.º 19
0
    def __get_file_from_moves_cache(self, path):
        # Path is not in the cache, but it should
        # Look if any of its parents was moved
        printdbg("DBContentHandler: looking for path %s in moves cache", (path,))
        current_path = path
        replaces = []
        while current_path not in self.file_cache:
            found = False
            for new_path in self.moves_cache.keys():
                if not current_path.startswith(new_path) or new_path in replaces:
                    continue

                current_path = current_path.replace(new_path, self.moves_cache[new_path], 1)
                replaces.append(new_path)
                found = True

            if not found:
                raise FileNotInCache

        return self.file_cache[current_path]
Ejemplo n.º 20
0
    def __get_file_from_moves_cache (self, path):
        # Path is not in the cache, but it should
        # Look if any of its parents was moved
        printdbg ("DBContentHandler: looking for path %s in moves cache", (path,))
        current_path = path
        replaces = []
        while current_path not in self.file_cache:
            found = False
            for new_path in self.moves_cache.keys ():
                if not current_path.startswith (new_path) or new_path in replaces:
                    continue

                current_path = current_path.replace (new_path, self.moves_cache[new_path], 1)
                replaces.append (new_path)
                found = True
            
            if not found:
                raise FileNotInCache

        return self.file_cache[current_path]
Ejemplo n.º 21
0
    def __convert_commit_actions(self, commit):
        # We detect here files that have been moved or
        # copied. Files moved are converted into a
        # single action of type 'V'. For copied files
        # we just change its actions type from 'A' to 'C'

        def find_action(actions, type, path):
            for action in actions:
                if action.type == type and action.f1 == path:
                    return action
            
            return None

        remove_actions = []
        
        for action in commit.actions:
            if action.f2 is not None:
                # Move or copy action
                if action.type == 'A':
                    del_action = find_action(commit.actions, 'D', action.f2)
                    if del_action is not None and del_action \
                    not in remove_actions:
                        # FIXME: See http://goo.gl/eymoH
                        printdbg("SVN Parser: File %s has been renamed to %s", 
                                 (action.f2, action.f1))
                        action.type = 'V'
                        remove_actions.append(del_action)
                    else:
                        action.type = 'C'
                        printdbg("SVN Parser: File %s has been copied to %s", 
                                 (action.f2, action.f1))

                        # Try to guess if it was a tag
                        # Yes, with svn we are always guessing :-/
                        tag = self.__guess_tag_from_path(action.f1)
                        if tag is not None:
                            if commit.tags is None:
                                commit.tags = []

                            commit.tags.append(tag)
                            
                elif action.type == 'R':
                    # TODO
                    printdbg("SVN Parser: File %s replaced to %s", 
                             (action.f2, action.f1))
                    pass

        for action in remove_actions:
            printdbg("SVN Parser: Removing action %s %s", 
                     (action.type, action.f1))
            commit.actions.remove(action)
Ejemplo n.º 22
0
    def __convert_commit_actions(self, commit):
        # We detect here files that have been moved or
        # copied. Files moved are converted into a
        # single action of type 'V'. For copied files
        # we just change its actions type from 'A' to 'C'

        def find_action(actions, type, path):
            for action in actions:
                if action.type == type and action.f1 == path:
                    return action

            return None

        remove_actions = []

        for action in commit.actions:
            if action.f2 is not None:
                # Move or copy action
                if action.type == 'A':
                    del_action = find_action(commit.actions, 'D', action.f2)
                    if del_action is not None and del_action \
                    not in remove_actions:
                        # FIXME: See http://goo.gl/eymoH
                        printdbg("SVN Parser: File %s has been renamed to %s",
                                 (action.f2, action.f1))
                        action.type = 'V'
                        remove_actions.append(del_action)
                    else:
                        action.type = 'C'
                        printdbg("SVN Parser: File %s has been copied to %s",
                                 (action.f2, action.f1))

                        # Try to guess if it was a tag
                        # Yes, with svn we are always guessing :-/
                        tag = self.__guess_tag_from_path(action.f1)
                        if tag is not None:
                            if commit.tags is None:
                                commit.tags = []

                            commit.tags.append(tag)

                elif action.type == 'R':
                    # TODO
                    printdbg("SVN Parser: File %s replaced to %s",
                             (action.f2, action.f1))
                    pass

        for action in remove_actions:
            printdbg("SVN Parser: Removing action %s %s",
                     (action.type, action.f1))
            commit.actions.remove(action)
Ejemplo n.º 23
0
 def do_delete(self, delete_statement, params=None,
               error_message="Delete failed, data needs manual cleanup"):
     if self.repo_id is None:
         # Repo wasn't found anyway, so continue
         return True
     
     # You can't reference instance variables in default
     # parameters, so I have to do this.
     if params is None:
         params = (self.repo_id,)
     
     try:
         delete_cursor = self.connection.cursor()
         execute_statement(statement(delete_statement, 
                                     self.db.place_holder),
                           params, delete_cursor,
                           self.db, error_message)
     except Exception:
         printdbg("Deletion exception")
     finally:
         delete_cursor.close()
Ejemplo n.º 24
0
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d",
                            (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?",
                            self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__,
                                self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d",
                          (branch, self.repo_id), True)

            return branch_id
Ejemplo n.º 25
0
        def ensure_path(path, commit_id):
            profiler_start("Ensuring path %s for repository %d",
                           (path, self.repo_id))
            printdbg("DBContentHandler: ensure_path %s", (path,))

            prefix, lpath = path.split("://", 1)
            prefix += "://"
            tokens = lpath.strip('/').split('/')

            parent = -1
            node_id = None
            for i, token in enumerate(tokens):
                rpath = prefix + '/' + '/'.join(tokens[:i + 1])
                if not ":///" in path:
                    # If the repo paths don't start with /
                    # remove it here
                    rpath = rpath.replace(':///', '://')
                printdbg("DBContentHandler: rpath: %s", (rpath,))
                try:
                    node_id, parent_id = self.file_cache[rpath]
                    parent = node_id
                    continue
                except:
                    pass

                # Rpath not in cache, add it
                node_id = self.__add_new_file_and_link(token, parent,
                                                       commit_id)
                parent_id = parent
                parent = node_id

                # Also add to file_paths
                self.__add_file_path(commit_id, node_id,
                    re.sub('^\d+://', '', rpath))

                self.file_cache[rpath] = (node_id, parent_id)

            assert node_id is not None

            printdbg("DBContentHandler: path ensured %s = %d (%d)",
                     (path, node_id, parent_id))
            profiler_stop("Ensuring path %s for repository %d",
                          (path, self.repo_id), True)

            return node_id, parent_id
Ejemplo n.º 26
0
    def end(self):
        # The log is now in the temp table
        # Retrieve the data now and pass it to
        # the real content handler

        self.templog.flush()
        printdbg("DBProxyContentHandler: parsing finished, creating thread")

        self.db_handler.begin()
        self.db_handler.repository(self.repo_uri)

        queue = AsyncQueue(50)
        reader_thread = threading.Thread(target=self.__reader,
                                         args=(self.templog, queue))
        reader_thread.setDaemon(True)
        reader_thread.start()

        # Use the queue with mutexes while the
        # thread is alive
        while reader_thread.isAlive():
            try:
                item = queue.get(1)
            except TimeOut:
                continue
            printdbg("DBProxyContentHandler: commit: %s", (item.revision, ))
            self.db_handler.commit(item)
            del item

        # No threads now, we don't need locks
        printdbg(
            "DBProxyContentHandler: thread __reader is finished, continue without locks"
        )
        while not queue.empty_unlocked():
            item = queue.get_unlocked()
            self.db_handler.commit(item)
            del item

        self.db_handler.end()
        self.templog.clear()
    def end(self):
        # The log is now in the temp table
        # Retrieve the data now and pass it to
        # the real content handler

        self.templog.flush()
        printdbg("DBProxyContentHandler: parsing finished, creating thread")

        self.db_handler.begin()
        self.db_handler.repository(self.repo_uri)

        queue = AsyncQueue(50)
        reader_thread = threading.Thread(target=self.__reader,
                                          args=(self.templog, queue))
        reader_thread.setDaemon(True)
        reader_thread.start()

        # Use the queue with mutexes while the
        # thread is alive
        while reader_thread.isAlive():
            try:
                item = queue.get(1)
            except TimeOut:
                continue
            printdbg("DBProxyContentHandler: commit: %s", (item.revision,))
            self.db_handler.commit(item)
            del item

        # No threads now, we don't need locks
        printdbg("DBProxyContentHandler: thread __reader is finished, " + \
                 "continue without locks")
        while not queue.empty_unlocked():
            item = queue.get_unlocked()
            self.db_handler.commit(item)
            del item

        self.db_handler.end()
        self.templog.clear()
Ejemplo n.º 28
0
    def create_tables(self, cursor):
        import sqlite3.dbapi2

        try:
            cursor.execute("""CREATE TABLE repositories (
                            id integer primary key,
                            uri varchar,
                            name varchar,
                            type varchar 
                            )""")
            cursor.execute("""CREATE TABLE people (
                            id integer primary key,
                            name varchar,
                            email varchar
                            )""")
            cursor.execute("""CREATE TABLE scmlog (
                            id integer primary key,
                            rev varchar,
                            committer_id integer,
                            author_id integer,
                            date datetime,
                            message varchar,
                            composed_rev bool, 
                            repository_id integer
                            )""")
            cursor.execute("""CREATE TABLE actions (
                            id integer primary key,
                            type varchar(1),
                            file_id integer,
                            commit_id integer,
                            branch_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_copies (
                            id integer primary key,
                            to_id integer,
                            from_id integer,
                            from_commit_id integer,
                            new_file_name varchar,
                            action_id integer
                            )""")
            cursor.execute("""CREATE TABLE branches (
                            id integer primary key,
                            name varchar
                            )""")
            cursor.execute("""CREATE TABLE files (
                            id integer primary key,
                            file_name varchar(255),
                            repository_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_links (
                            id integer primary key,
                            parent_id integer,
                            file_id integer,
                            commit_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_paths (
                            id integer primary key,
                            commit_id integer,
                            file_id integer,
                            file_path varchar(255)
                            )""")
            cursor.execute("""CREATE TABLE tags (
                            id integer primary key,
                            name varchar
                            )""")
            cursor.execute("""CREATE TABLE tag_revisions (
                            id integer primary key,
                            tag_id integer,
                            commit_id integer
                            )""")
            cursor.execute("CREATE index files_file_name on files(file_name)")
            cursor.execute("CREATE index scmlog_date on scmlog(date)")
            cursor.execute("CREATE index scmlog_repo on scmlog(repository_id)")
            self._create_views(cursor)
        except sqlite3.dbapi2.OperationalError as e:
            printdbg("Exception creating SQLite tables: " + str(e))
            raise TableAlreadyExists
        except:
            raise
Ejemplo n.º 29
0
 def __init__ (self):
     options = Config()
     self.backend = create_backend (options.type)
     printdbg ("Bicho object created, options and backend initialized")
Ejemplo n.º 30
0
    def _parse_line (self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match (line):
                return

        # Commit
        match = self.patterns['commit'].match (line)
        if match:
            if self.commit is not None and self.branch.is_remote ():
                if self.branch.tail.svn_tag is None: # Skip commits on svn tags
                    self.handler.commit (self.branch.tail.commit)

            self.commit = Commit ()
            self.commit.revision = match.group (1)

            parents = match.group (3)
            if parents:
                parents = parents.split ()
            git_commit = self.GitCommit (self.commit, parents)

            decorate = match.group (5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search (self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch (self.GitBranch.REMOTE, m.group (1), git_commit)
                    printdbg ("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search (self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch (self.GitBranch.LOCAL, m.group (1), git_commit)
                        printdbg ("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent (git_commit):
                            printdbg ("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search (self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch (self.GitBranch.STASH, "stash", git_commit)
                            printdbg ("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search (self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group (1)]
                    printdbg ("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if branch is not None and self.branch is not None:
                # Detect empty branches. Ideally, the head of a branch
                # can't have children. When this happens is because the
                # branch is empty, so we just ignore such branch
                if self.branch.is_my_parent (git_commit):
                    printout ("Warning: Detected empty branch '%s', it'll be ignored", (branch.name,))
                    branch = None

            if len (self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate (self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent (git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg ("Start point of branch '%s' at commit %s", (self.branches[0].name, self.commit.revision))
                        self.branches.pop (0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent (git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg ("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                          self.branch.tail.commit.revision,
                                                                                          self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append (pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.is_remote () and branch.name == 'master':
                    self.branches.append (self.branch)
                else:
                    self.branches.insert (0, self.branch)
            else:
                self.branch.set_tail (git_commit)

            return

        # Committer
        match = self.patterns['committer'].match (line)
        if match:
            self.commit.committer = Person ()
            self.commit.committer.name = match.group (1)
            self.commit.committer.email = match.group (2)
            self.handler.committer (self.commit.committer)

            return

        # Author
        match = self.patterns['author'].match (line)
        if match:
            self.commit.author = Person ()
            self.commit.author.name = match.group (1)
            self.commit.author.email = match.group (2)
            self.handler.author (self.commit.author)

            return

        # Date
        match = self.patterns['date'].match (line)
        if match:
            self.commit.date = datetime.datetime (* (time.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")
            
            return

        # File
        match = self.patterns['file'].match (line)
        if match:
            action = Action ()
            action.type = match.group (1)
            action.f1 = match.group (2)

            self.commit.actions.append (action)
            self.handler.file (action.f1)
        
            return

        # File moved/copied
        match = self.patterns['file-moved'].match (line)
        if match:
            action = Action ()
            type = match.group (1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group (3)
            action.f2 = match.group (2)
            action.rev = self.commit.revision

            self.commit.actions.append (action)
            self.handler.file (action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match (line.strip ())
            if match:
                printout ("Warning: detected a commit on a svn tag: %s", (match.group (0),))
                tag = match.group (1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)
Ejemplo n.º 31
0
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None:
                # Skip commits on svn tags
                if self.branch.tail.svn_tag is None:
                    self.handler.commit(self.branch.tail.commit)

            if self.patterns['replace-commit'].search(line):
                printdbg("Skipping commit, because it's a replacement")
                self.commit = None

                return

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
            git_commit = self.GitCommit(self.commit, parents)

            # If a specific branch has been configured, there
            # won't be any decoration, so a branch needs to be
            # created
            if Config().branch is not None:
                self.branch = self.GitBranch(self.GitBranch.LOCAL,
                                             Config().branch, git_commit)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(2),
                                            git_commit)
                    printdbg("Branch '%s' head at acommit %s",
                             (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL,
                                                m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'",
                                 (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this
                        # decoration
                        if self.branch and \
                        self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged",
                                     (branch.name, ))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH,
                                                    "stash", git_commit)
                            printdbg("Commit %s on stash",
                                     (self.commit.revision, ))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'",
                             (self.commit.revision, self.commit.tags[0]))

            if branch is not None and self.branch is not None:
                # Detect empty branches. Ideally, the head of a branch
                # can't have children. When this happens is because the
                # branch is empty, so we just ignore such branch
                if self.branch.is_my_parent(git_commit):
                    printout("Warning: Detected empty branch '%s', " + \
                             "it'll be ignored", (branch.name,))
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and \
            self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s " + \
                         "to current %s", (pending_tag,
                                           self.branch.tail.commit.revision,
                                           self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.is_remote() and branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                self.branch.set_tail(git_commit)

            if parents and len(parents) > 1 and not Config().analyze_merges:
                #Skip merge commits
                self.commit = None

            return
        elif self.commit is None:
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)

            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)

            return

        # Commit Date
        match = self.patterns['commit-date'].match(line)
        if match:
            self.commit.commit_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # Author Date
        match = self.patterns['author-date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            action.type = match.group(1)
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)
Ejemplo n.º 32
0
    def create_tables(self, cursor):
        import sqlite3.dbapi2

        try:
            cursor.execute("""CREATE TABLE repositories (
                            id integer primary key,
                            uri varchar,
                            name varchar,
                            type varchar 
                            )""")
            cursor.execute("""CREATE TABLE people (
                            id integer primary key,
                            name varchar,
                            email varchar
                            )""")
            cursor.execute("""CREATE TABLE scmlog (
                            id integer primary key,
                            rev varchar,
                            committer_id integer,
                            author_id integer,
                            date datetime,
                            message varchar,
                            composed_rev bool, 
                            repository_id integer
                            )""")
            cursor.execute("""CREATE TABLE actions (
                            id integer primary key,
                            type varchar(1),
                            file_id integer,
                            commit_id integer,
                            branch_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_copies (
                            id integer primary key,
                            to_id integer,
                            from_id integer,
                            from_commit_id integer,
                            new_file_name varchar,
                            action_id integer
                            )""")
            cursor.execute("""CREATE TABLE branches (
                            id integer primary key,
                            name varchar
                            )""")
            cursor.execute("""CREATE TABLE files (
                            id integer primary key,
                            file_name varchar(255),
                            repository_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_links (
                            id integer primary key,
                            parent_id integer,
                            file_id integer,
                            commit_id integer
                            )""")
            cursor.execute("""CREATE TABLE file_paths (
                            id integer primary key,
                            commit_id integer,
                            file_id integer,
                            file_path varchar(255)
                            )""")
            cursor.execute("""CREATE TABLE tags (
                            id integer primary key,
                            name varchar
                            )""")
            cursor.execute("""CREATE TABLE tag_revisions (
                            id integer primary key,
                            tag_id integer,
                            commit_id integer
                            )""")
            cursor.execute("CREATE index files_file_name on files(file_name)")
            cursor.execute("CREATE index scmlog_date on scmlog(date)")
            cursor.execute("CREATE index scmlog_repo on scmlog(repository_id)")
            self._create_views(cursor)
        except sqlite3.dbapi2.OperationalError as e:
            printdbg("Exception creating SQLite tables: " + str(e))
            raise TableAlreadyExists
        except:
            raise
Ejemplo n.º 33
0
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None and self.branch is not None:
                if self.branch.tail.svn_tag is None:  # Skip commits on svn tags
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
                self.commit.parents = parents
            git_commit = self.GitCommit(self.commit, parents)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit)
                    printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit)
                            printdbg("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if not branch and not self.branch:
                branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit)
                printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name))

            # This part of code looks wired at first time so here is a small description what it does:
            #
            # * self.branch is the branch to which the last inspected commit belonged to
            # * branch is the branch of the current parsed commit
            #
            # This check is only to find branches which are fully merged into a already analyzed branch
            #
            # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64
            if branch is not None and self.branch is not None:
                # Detect empty branches.
                # Ideally, the head of a branch can't have children.
                # When this happens is because the branch is empty, so we just ignore such branch.
                if self.branch.is_my_parent(git_commit):
                    printout(
                        "Info: Branch '%s' will be ignored, because it was already merged in an active one.",
                        (branch.name,)
                    )
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                         self.branch.tail.commit.revision,
                                                                                         self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                if self.branch is not None:
                    self.branch.set_tail(git_commit)
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)
            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)
            return

        # Commit date
        match = self.patterns['date'].match(line)
        if match:
            self.commit.date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # Author date
        match = self.patterns['author_date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if len(type) > 1:
                # merge actions
                if 'M' in type:
                    type = 'M'
                else:
                    # ignore merge actions without 'M'
                    return

            action.type = type
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match(line.strip())
            if match:
                printout("Warning: detected a commit on a svn tag: %s", (match.group(0),))
                tag = match.group(1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)
Ejemplo n.º 34
0
Archivo: main.py Proyecto: iKuba/Bicho
    # Note: Default values for options are defined on
    # configuration module
    usage = 'Usage: %prog [options]'

    try:
        Config.set_config_options(usage)
    except (ErrorLoadingConfig, InvalidConfig), e:
        printerr(str(e))
        sys.exit(2)

    try:
        backend = Backend.create_backend(Config.backend)
    except ImportError, e:
        printerr("Backend ''" + Config.backend + "'' not exists. " + str(e))
        sys.exit(2)
    printdbg("Bicho object created, options and backend initialized")
    backend.run()

    if Config.logtable:
        try:
            ilogger = IssueLogger.create_logger(Config.backend)
        except ImportError, e:
            printerr("Logger ''" + Config.backend + "'' doesn't exist. " +
                     str(e))
            sys.exit(2)
        printdbg("Bicho logger object created")
        ilogger.run()


if __name__ == "__main__":
    main()
Ejemplo n.º 35
0
    def __get_file_for_path(self, path, commit_id, old=False):
        """Get a pair of (node_id, parent_id) regarding a path.
           First, it looks at file_cache, the at the moves cache,
           then at the deleted cache and finally, when it is not
           found in the cache, it is added and linked in the
           database.
        """
        def ensure_path(path, commit_id):
            profiler_start("Ensuring path %s for repository %d",
                           (path, self.repo_id))
            printdbg("DBContentHandler: ensure_path %s", (path,))

            prefix, lpath = path.split("://", 1)
            prefix += "://"
            tokens = lpath.strip('/').split('/')

            parent = -1
            node_id = None
            for i, token in enumerate(tokens):
                rpath = prefix + '/' + '/'.join(tokens[:i + 1])
                if not ":///" in path:
                    # If the repo paths don't start with /
                    # remove it here
                    rpath = rpath.replace(':///', '://')
                printdbg("DBContentHandler: rpath: %s", (rpath,))
                try:
                    node_id, parent_id = self.file_cache[rpath]
                    parent = node_id
                    continue
                except:
                    pass

                # Rpath not in cache, add it
                node_id = self.__add_new_file_and_link(token, parent,
                                                       commit_id)
                parent_id = parent
                parent = node_id

                # Also add to file_paths
                self.__add_file_path(commit_id, node_id,
                    re.sub('^\d+://', '', rpath))

                self.file_cache[rpath] = (node_id, parent_id)

            assert node_id is not None

            printdbg("DBContentHandler: path ensured %s = %d (%d)",
                     (path, node_id, parent_id))
            profiler_stop("Ensuring path %s for repository %d",
                          (path, self.repo_id), True)

            return node_id, parent_id

        printdbg("DBContentHandler: Looking for path %s in cache", (path,))
        # First of all look at the cache
        try:
            return self.file_cache[path]
        except KeyError:
            pass

        # It's not in the cache look now at moves cache
        try:
            retval = self.__get_file_from_moves_cache(path)
            printdbg("DBContentHandler: Found %s in moves cache", (path,))
            self.file_cache[path] = retval
            return retval
        except FileNotInCache:
            pass

        # If it's an old file (that is, the path has been
        # taken from the "from" part of an action that
        # has two paths) it might be deletes or replaced
        if old:
            try:
                return self.deletes_cache[path]
            except KeyError:
                pass

        # It hasen't been moved (or any of its parents)
        # so it was copied at some point
        return ensure_path(path, commit_id)
Ejemplo n.º 36
0
    def _parse_line(self, line):
        if not line:
            if self.commit is not None and self.state == SVNParser.COMMIT \
            or self.state == SVNParser.FILES:
                self.state = SVNParser.MESSAGE
            elif self.state == SVNParser.MESSAGE:
                self.__append_message_line()

            return

        # Message
        if self.state == SVNParser.MESSAGE and self.msg_lines > 0:
            self.__append_message_line(line)

            return

        # Invalid commit. Some svn repos like asterisk have commits like this:
        # r176840 | (no author) | (no date) | 1 line
        # without any canged path, so I think we can just ignore them
        if self.patterns['invalid'].match(line):
            printdbg("SVN Parser: skipping invalid commit: %s", (line, ))
            self.state = SVNParser.COMMIT
            self.commit = None
            return

        # Separator
        if self.patterns['separator'].match(line):
            if self.commit is None or self.state == SVNParser.COMMIT:
                return
            elif self.state == SVNParser.MESSAGE \
            or self.state == SVNParser.FILES:
                # We can go directly from FILES to COMMIT
                # when there is an empty log message
                if self.msg_lines > 0:
                    printout("Warning (%d): parsing svn log, missing " + \
                             "lines in commit message!", (self.n_line,))

                self.__convert_commit_actions(self.commit)
                self.handler.commit(self.commit)
                self.state = SVNParser.COMMIT
                self.commit = None
                self.msg_lines = 0
            else:
                printout("Warning (%d): parsing svn log, unexpected separator",
                         (self.n_line, ))

            return

        # Commit
        match = self.patterns['commit'].match(line)
        if match and self.state == SVNParser.COMMIT:
            commit = Commit()
            commit.revision = match.group(1)

            commit.committer = Person()
            commit.committer.name = match.group(2)

            commit.date = datetime.datetime(int(match.group(3)),
                                            int(match.group(4)),
                                            int(match.group(5)),
                                            int(match.group(6)),
                                            int(match.group(7)),
                                            int(match.group(8)))
            self.msg_lines = int(match.group(10))
            self.commit = commit
            self.handler.committer(commit.committer)

            return
        elif match and self.state == SVNParser.MESSAGE:
            # It seems a piece of a log message has been copied as
            # part of the commit message
            self.commit.message += line + '\n'
            return
        elif match and self.state != SVNParser.COMMIT:
            printout("Warning (%d): parsing svn log, unexpected line %s",
                     (self.n_line, line))
            return

        # Files
        if self.state == SVNParser.COMMIT:
            if self.patterns['paths'].match(line):
                self.state = SVNParser.FILES
            else:
                printout("Warning(%d): parsing svn log, unexpected line %s",
                         (self.n_line, line))

            return

        # File moved/copied/replaced
        match = self.patterns['file-moved'].match(line)
        if match:
            if self.state != SVNParser.FILES:
                printout("Warning (%d): parsing svn log, unexpected line %s",
                         (self.n_line, line))
                return

            action = Action()
            action.type = match.group(1)
            action.f1 = match.group(2)
            action.f2 = match.group(3)
            action.rev = match.group(4)

            action.branch_f1 = self.__guess_branch_from_path(action.f1)
            action.branch_f2 = self.__guess_branch_from_path(action.f2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            if self.state != SVNParser.FILES:
                printout("Warning (%d): parsing svn log, unexpected line %s",
                         (self.n_line, line))
                return

            path = match.group(2)

            if path != '/':
                # path == '/' is probably a properties change in /
                # not interesting for us, ignoring

                action = Action()
                action.type = match.group(1)
                action.f1 = path

                action.branch_f1 = self.__guess_branch_from_path(path)

                self.commit.actions.append(action)
                self.handler.file(path)

            return
Ejemplo n.º 37
0
    def __get_file_for_path(self, path, commit_id, old=True):
        """Get a pair of (node_id, parent_id) regarding a path.
           First, it looks at file_cache, the at the moves cache,
           then at the deleted cache and finally, when it is not
           found in the cache, it is added and linked in the
           database.
        """
        def ensure_path(path, commit_id):
            profiler_start("Ensuring path %s for repository %d",
                           (path, self.repo_id))
            printdbg("DBContentHandler: ensure_path %s", (path, ))

            prefix, lpath = path.split("://", 1)
            prefix += "://"
            tokens = lpath.strip('/').split('/')

            parent = -1
            node_id = None
            for i, token in enumerate(tokens):
                file_path = '/'.join(tokens[:i + 1])
                rpath = prefix + '/' + file_path
                if not ":///" in path:
                    # If the repo paths don't start with /
                    # remove it here
                    rpath = rpath.replace(':///', '://')
                printdbg("DBContentHandler: rpath: %s", (rpath, ))
                try:
                    node_id, parent_id = self.file_cache[rpath]
                    parent = node_id
                    continue
                except:
                    pass

                # Rpath not in cache, add it
                node_id = self.__add_new_file_and_link(token, parent,
                                                       commit_id, file_path)
                parent_id = parent
                parent = node_id

                self.file_cache[rpath] = (node_id, parent_id)

            assert node_id is not None

            printdbg("DBContentHandler: path ensured %s = %d (%d)",
                     (path, node_id, parent_id))
            profiler_stop("Ensuring path %s for repository %d",
                          (path, self.repo_id), True)

            return node_id, parent_id

        printdbg("DBContentHandler: Looking for path %s in cache", (path, ))
        # First of all look at the cache
        try:
            return self.file_cache[path]
        except KeyError:
            pass

        # It's not in the cache look now at moves cache
        try:
            retval = self.__get_file_from_moves_cache(path)
            printdbg("DBContentHandler: Found %s in moves cache", (path, ))
            self.file_cache[path] = retval
            return retval
        except FileNotInCache:
            pass

        # Due to branching, the file may be deleted in other branches,
        # and thus in deletes_cache. Unless in A action when we are
        # pretty sure that it is a new file, we should always look
        # at the deletes_cache for file_id
        if old and path in self.deletes_cache:
            return self.deletes_cache[path]

        # It hasen't been moved (or any of its parents)
        # so it was copied at some point
        return ensure_path(path, commit_id)
Ejemplo n.º 38
0
 def backout_extensions(self, repo, uri, db):
     printdbg("Called backout extensions")
     self.run_extensions(repo, uri, db, backout=True)
Ejemplo n.º 39
0
        config.bug_fix_regexes_case_sensitive = \
            bug_fix_regexes_case_sensitive

    if not config.extensions and config.no_parse:
        # Do nothing!!!
        return 0

    if config.debug:
        import repositoryhandler
        repositoryhandler.backends.DEBUG = True

    path = uri_to_filename(uri)
    (uri, repo) = _get_uri_and_repo(path)

    if not config.no_parse:
        printdbg("Preparing logging")
        # Create reader
        reader = LogReader()
        reader.set_repo(repo, path or uri)
        reader.set_branch(config.branch)

        # Create parser
        if config.repo_logfile is not None:
            parser = create_parser_from_logfile(config.repo_logfile)
            reader.set_logfile(config.repo_logfile)
        else:
            parser = _get_parser_from_repository(repo)

        parser.set_repository(repo, uri)

        if parser is None:
Ejemplo n.º 40
0
 def backout_extensions(self, repo, uri, db):
     printdbg("Called backout extensions")
     self.run_extensions(repo, uri, db, backout=True)
Ejemplo n.º 41
0
    """
    # Note: Default values for options are defined in
    # configuration module
    usage = 'Usage: %prog [options]'

    try:
        Config.set_config_options(usage)
    except (ErrorLoadingConfig, InvalidConfig), e:
        printerr(str(e))
        sys.exit(2)

    try:
        backend = Backend.create_backend(Config.backend)
    except ImportError, e:
        printerr("Backend ''" + Config.backend + "'' doesn't exist. " + str(e))
        sys.exit(2)
    printdbg("Bicho object created, options and backend initialized")
    backend.run()

    if Config.logtable:
        try:
            ilogger = IssueLogger.create_logger(Config.backend)
        except ImportError, e:
            printerr("Logger ''" + Config.backend + "'' doesn't exist. " + str(e))
            sys.exit(2)
        printdbg("Bicho logger object created")
        ilogger.run()

if __name__ == "__main__":
    main()
Ejemplo n.º 42
0
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None:
                # Skip commits on svn tags
                if self.branch.tail.svn_tag is None:
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
            git_commit = self.GitCommit(self.commit, parents)

            # If a specific branch has been configured, there
            # won't be any decoration, so a branch needs to be
            # created
            if Config().branch is not None:
                self.branch = self.GitBranch(self.GitBranch.LOCAL,
                                             Config().branch,
                                             git_commit)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1),
                                            git_commit)
                    printdbg("Branch '%s' head at acommit %s",
                             (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL,
                                                m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'",
                                 (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this
                        # decoration
                        if self.branch and \
                        self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged",
                                     (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH,
                                                    "stash", git_commit)
                            printdbg("Commit %s on stash",
                                     (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'",
                             (self.commit.revision, self.commit.tags[0]))

            if branch is not None and self.branch is not None:
                # Detect empty branches. Ideally, the head of a branch
                # can't have children. When this happens is because the
                # branch is empty, so we just ignore such branch
                if self.branch.is_my_parent(git_commit):
                    printout("Warning: Detected empty branch '%s', " + \
                             "it'll be ignored", (branch.name,))
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and \
            self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s " + \
                         "to current %s", (pending_tag,
                                           self.branch.tail.commit.revision,
                                           self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.is_remote() and branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                self.branch.set_tail(git_commit)

            if parents and len(parents) > 1 and not Config().analyze_merges:
                #Skip merge commits
                self.commit = None

            return
        elif self.commit is None:
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)

            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)

            return

        # Commit Date
        match = self.patterns['commit-date'].match(line)
        if match:
            self.commit.commit_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # Author Date
        match = self.patterns['author-date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            action.type = match.group(1)
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)
Ejemplo n.º 43
0
        config.bug_fix_regexes_case_sensitive = \
            bug_fix_regexes_case_sensitive

    if not config.extensions and config.no_parse:
        # Do nothing!!!
        return 0

    if config.debug:
        import repositoryhandler
        repositoryhandler.backends.DEBUG = True

    path = uri_to_filename(uri)
    (uri, repo) = _get_uri_and_repo(path)

    if not config.no_parse:
        printdbg("Preparing logging")
        # Create reader
        reader = LogReader()
        reader.set_repo(repo, path or uri)
        reader.set_branch(config.branch)

        # Create parser
        if config.repo_logfile is not None:
            parser = create_parser_from_logfile(config.repo_logfile)
            reader.set_logfile(config.repo_logfile)
        else:
            parser = _get_parser_from_repository(repo)

        parser.set_repository(repo, uri)

        if parser is None:
Ejemplo n.º 44
0
    def commit(self, commit):
        if commit.revision in self.revision_cache:
            return

        profiler_start("New commit %s for repository %d", (commit.revision,
                                                           self.repo_id))

        log = DBLog(None, commit)
        log.repository_id = self.repo_id
        self.revision_cache[commit.revision] = log.id

        log.committer = self.__get_person(commit.committer)

        if commit.author == commit.committer:
            log.author = log.committer
        elif commit.author is not None:
            log.author = self.__get_person(commit.author)

        self.commits.append(log)

        printdbg("DBContentHandler: commit: %d rev: %s", (log.id, log.rev))

        # TODO: sort actions? R, A, D, M, V, C
        for action in commit.actions:
            printdbg("DBContentHandler: Action: %s", (action.type,))
            dbaction = DBAction(None, action.type)
            dbaction.commit_id = log.id

            branch = commit.branch or action.branch_f1
            branch_id = self.__get_branch(branch)
            dbaction.branch_id = branch_id

            prefix = "%d://" % (branch_id)
            path = prefix + action.f1

            if action.type == 'A':
                # A file has been added
                file_id = self.__action_add(path, prefix, log)
            elif action.type == 'M':
                # A file has been modified
                file_id = self.__get_file_for_path(path, log.id)[0]
            elif action.type == 'D':
                # A file has been deleted
                file_id = self.__action_delete(path, log)
            elif action.type == 'V':
                # A file has been renamed
                file_id = self.__action_rename(path, prefix, log, action,
                                               dbaction)
            elif action.type == 'C':
                # A file has been copied
                file_id = self.__action_copy(path, prefix, log, action,
                                             dbaction)
            elif action.type == 'R':
                # A file has been replaced
                file_id = self.__action_replace(path, prefix, log, action,
                                                dbaction)
                if file_id is None:
                    continue
            else:
                assert "Unknown action type %s" % (action.type)

            dbaction.file_id = file_id
            self.actions.append(dbaction)

        # Tags
        if commit.tags is not None:
            tag_revs = []
            for tag in commit.tags:
                tag_id = self.__get_tag(tag)
                db_tagrev = DBTagRev(None)
                tag_revs.append((db_tagrev.id, tag_id, log.id))

            self.cursor.executemany(statement(DBTagRev.__insert__,
                                              self.db.place_holder), tag_revs)

        if len(self.actions) >= self.MAX_ACTIONS:
            printdbg("DBContentHandler: %d actions inserting",
                     (len(self.actions),))
            self.__insert_many()

        profiler_stop("New commit %s for repository %d", (commit.revision,
                                                          self.repo_id), True)
Ejemplo n.º 45
0
    def _parse_line(self, line):
        if not line:
            if self.commit is not None and self.state == SVNParser.COMMIT \
            or self.state == SVNParser.FILES:
                self.state = SVNParser.MESSAGE
            elif self.state == SVNParser.MESSAGE:
                self.__append_message_line()
                
            return

        # Message
        if self.state == SVNParser.MESSAGE and self.msg_lines > 0:
            self.__append_message_line(line)

            return
        
        # Invalid commit. Some svn repos like asterisk have commits like this:
        # r176840 | (no author) | (no date) | 1 line
        # without any canged path, so I think we can just ignore them
        if self.patterns['invalid'].match(line):
            printdbg("SVN Parser: skipping invalid commit: %s", (line,))
            self.state = SVNParser.COMMIT
            self.commit = None
            return
        
        # Separator
        if self.patterns['separator'].match(line):
            if self.commit is None or self.state == SVNParser.COMMIT:
                return
            elif self.state == SVNParser.MESSAGE \
            or self.state == SVNParser.FILES:
                # We can go directly from FILES to COMMIT
                # when there is an empty log message
                if self.msg_lines > 0:
                    printout("Warning (%d): parsing svn log, missing " + \
                             "lines in commit message!", (self.n_line,))
                
                self.__convert_commit_actions(self.commit)
                self.handler.commit(self.commit)
                self.state = SVNParser.COMMIT
                self.commit = None
                self.msg_lines = 0
            else:
                printout("Warning (%d): parsing svn log, unexpected separator", 
                         (self.n_line,))
                
            return

        # Commit
        match = self.patterns['commit'].match(line)
        if match and self.state == SVNParser.COMMIT:
            commit = Commit()
            commit.revision = match.group(1)
            
            commit.committer = Person()
            commit.committer.name = match.group(2)
            
            commit.commit_date = datetime.datetime(int(match.group(3)),
                                                   int(match.group(4)),
                                                   int(match.group(5)),
                                                   int(match.group(6)),
                                                   int(match.group(7)),
                                                   int(match.group(8)))
            self.msg_lines = int(match.group(10))
            self.commit = commit
            self.handler.committer(commit.committer)
            
            return
        elif match and self.state == SVNParser.MESSAGE:
            # It seems a piece of a log message has been copied as
            # part of the commit message
            self.commit.message += line + '\n'
            return
        elif match and self.state != SVNParser.COMMIT:
            printout("Warning (%d): parsing svn log, unexpected line %s", 
                     (self.n_line, line))
            return

        # Files
        if self.state == SVNParser.COMMIT:
            if self.patterns['paths'].match(line):
                self.state = SVNParser.FILES
            else:
                printout("Warning(%d): parsing svn log, unexpected line %s", 
                         (self.n_line, line))

            return
        
        # File moved/copied/replaced
        match = self.patterns['file-moved'].match(line)
        if match:
            if self.state != SVNParser.FILES:
                printout("Warning (%d): parsing svn log, unexpected line %s", 
                         (self.n_line, line))
                return
            
            action = Action()
            action.type = match.group(1)
            action.f1 = match.group(2)
            action.f2 = match.group(3)
            action.rev = match.group(4)

            action.branch_f1 = self.__guess_branch_from_path(action.f1)
            action.branch_f2 = self.__guess_branch_from_path(action.f2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            if self.state != SVNParser.FILES:
                printout("Warning (%d): parsing svn log, unexpected line %s", 
                         (self.n_line, line))
                return
            
            path = match.group(2)

            if path != '/':
                # path == '/' is probably a properties change in /
                # not interesting for us, ignoring

                action = Action()
                action.type = match.group(1)
                action.f1 = path

                action.branch_f1 = self.__guess_branch_from_path(path)

                self.commit.actions.append(action)
                self.handler.file(path)

            return
Ejemplo n.º 46
0
    def __get_file_for_path(self, path, commit_id, old=True):
        """Get a pair of (node_id, parent_id) regarding a path.
           First, it looks at file_cache, the at the moves cache,
           then at the deleted cache and finally, when it is not
           found in the cache, it is added and linked in the
           database.
        """

        def ensure_path(path, commit_id):
            profiler_start("Ensuring path %s for repository %d", (path, self.repo_id))
            printdbg("DBContentHandler: ensure_path %s", (path,))

            prefix, lpath = path.split("://", 1)
            prefix += "://"
            tokens = lpath.strip('/').split('/')

            parent = -1
            node_id = None
            for i, token in enumerate(tokens):
                file_path = '/'.join(tokens[:i + 1])
                rpath = prefix + '/' + file_path
                if not ":///" in path:
                    # If the repo paths don't start with /
                    # remove it here
                    rpath = rpath.replace(':///', '://')
                printdbg("DBContentHandler: rpath: %s", (rpath,))
                try:
                    node_id, parent_id = self.file_cache[rpath]
                    parent = node_id
                    continue
                except:
                    pass

                # Rpath not in cache, add it
                node_id = self.__add_new_file_and_link(token, parent, commit_id, file_path)
                parent_id = parent
                parent = node_id

                self.file_cache[rpath] = (node_id, parent_id)

            assert node_id is not None

            printdbg("DBContentHandler: path ensured %s = %d (%d)", (path, node_id, parent_id))
            profiler_stop("Ensuring path %s for repository %d", (path, self.repo_id), True)

            return node_id, parent_id

        printdbg("DBContentHandler: Looking for path %s in cache", (path,))
        # First of all look at the cache
        try:
            return self.file_cache[path]
        except KeyError:
            pass

        # It's not in the cache look now at moves cache
        try:
            retval = self.__get_file_from_moves_cache(path)
            printdbg("DBContentHandler: Found %s in moves cache", (path,))
            self.file_cache[path] = retval
            return retval
        except FileNotInCache:
            pass

        # Due to branching, the file may be deleted in other branches,
        # and thus in deletes_cache. Unless in A action when we are
        # pretty sure that it is a new file, we should always look
        # at the deletes_cache for file_id
        if old and path in self.deletes_cache:
            return self.deletes_cache[path]

        # It hasen't been moved (or any of its parents)
        # so it was copied at some point
        return ensure_path(path, commit_id)