Beispiel #1
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) \
                       for a in self.actions]
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBAction.__insert__,
                                         self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id,))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.date, \
                        to_utf8(c.message).decode("utf-8"), c.composed_rev, \
                        c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBLog.__insert__,
                                         self.db.place_holder), commits)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id,))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id,))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d",
                      (self.repo_id,))
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id,
                                a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(
                            statement(DBAction.__insert__,
                                      self.db.place_holder), action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(
                        statement(DBAction.__insert__, self.db.place_holder),
                        action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id,))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id, a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(statement(DBAction.__insert__,
                                             self.db.place_holder), 
                                             action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(statement(DBAction.__insert__,
                                             self.db.place_holder), 
                                             action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id,))
Beispiel #4
0
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                            (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                      (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(statement(
                "SELECT id from people where name = ?", self.db.place_holder),
                (to_utf8(name).decode("utf-8"),))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(statement(DBPerson.__insert__,
                                self.db.place_holder),
                                (p.id, to_utf8(p.name).decode("utf-8"),
                                 email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                           (person.name, self.repo_id), True)

            return person_id
Beispiel #5
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) for a in self.actions]
            profiler_start("Inserting actions for repository %d", (self.repo_id,))
            cursor.executemany(statement(DBAction.__insert__, self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d", (self.repo_id,))
        if self.commits:
            commits = [
                (c.id, c.rev, c.committer, c.author, c.date, c.date_tz, c.author_date, c.author_date_tz, c.message, c.composed_rev, c.repository_id)
                for c in self.commits]
            profiler_start("Inserting commits for repository %d", (self.repo_id,))
            cursor.executemany(statement(DBLog.__insert__, self.db.place_holder), commits)
	    p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE)
            for commit in commits:
		m = p.findall(commit[8])
		for bug in m:
		   issue_commit_link=(commit[0], bug)
                   cursor.execute(statement(DBIssueCommitLink.__insert__, self.db.place_holder), issue_commit_link)
            self.commits = []
            profiler_stop("Inserting commits for repository %d", (self.repo_id,))

        profiler_start("Committing inserts for repository %d", (self.repo_id,))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id,))
    def repository(self, uri):
        cursor = self.cursor
        cursor.execute(statement("SELECT id from repositories where uri = ?", self.db.place_holder), (uri,))
        self.repo_id = cursor.fetchone()[0]

        last_rev = last_commit = None
        query = "SELECT rev, id from scmlog " + "where id = (select max(id) from scmlog where repository_id = ?)"
        cursor.execute(statement(query, self.db.place_holder), (self.repo_id,))
        rs = cursor.fetchone()
        if rs is not None:
            last_rev, last_commit = rs

        filename = uri.replace("/", "_")
        self.cache_file = os.path.join(cvsanaly_cache_dir(), filename)

        # if there's a previous cache file, just use it
        if os.path.isfile(self.cache_file):
            self.__load_caches_from_disk()

            if last_rev is not None:
                try:
                    commit_id = self.revision_cache[last_rev]
                except KeyError:
                    msg = (
                        "Cache file %s is not up to date or it's corrupt: " % (self.cache_file)
                        + "Revision %s was not found in the cache file" % (last_rev)
                        + "It's not possible to continue, the cache "
                        + "file should be removed and the database cleaned up"
                    )
                    raise CacheFileMismatch(msg)
                if commit_id != last_commit:
                    # Cache and db don't match, removing cache
                    msg = (
                        "Cache file %s is not up to date or it's corrupt: " % (self.cache_file)
                        + "Commit id mismatch for revision %s (File Cache:%d, Database: %d). "
                        % (last_rev, commit_id, last_commit)
                        + "It's not possible to continue, the cache "
                        + "file should be removed and the database cleaned up"
                    )
                    raise CacheFileMismatch(msg)
            else:
                # Database looks empty (or corrupt) and we have
                # a cache file. We can just remove it and continue
                # normally
                self.__init_caches()
                os.remove(self.cache_file)
                printout("Database looks empty, removing cache file %s", (self.cache_file,))
        elif last_rev is not None:
            # There are data in the database,
            # but we don't have a cache file!!!
            msg = (
                "Cache file %s is not up to date or it's corrupt: " % (self.cache_file)
                + "Cache file cannot be found"
                + "It's not possible to continue, the database "
                + "should be cleaned up"
            )
            raise CacheFileMismatch(msg)
Beispiel #7
0
    def repository(self, uri):
        cursor = self.cursor
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      self.db.place_holder), (uri, ))
        self.repo_id = cursor.fetchone()[0]

        last_rev = last_commit = None
        query = "SELECT rev, id from scmlog " + \
                "where id = (select max(id) from scmlog where repository_id = ?)"
        cursor.execute(statement(query, self.db.place_holder),
                       (self.repo_id, ))
        rs = cursor.fetchone()
        if rs is not None:
            last_rev, last_commit = rs

        filename = uri.replace('/', '_')
        self.cache_file = os.path.join(cvsanaly_cache_dir(), filename)

        # if there's a previous cache file, just use it
        if os.path.isfile(self.cache_file):
            self.__load_caches_from_disk()

            if last_rev is not None:
                try:
                    commit_id = self.revision_cache[last_rev]
                except KeyError:
                    msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \
                          "Revision %s was not found in the cache file" % (last_rev) + \
                          "It's not possible to continue, the cache " + \
                          "file should be removed and the database cleaned up"
                    raise CacheFileMismatch(msg)
                if commit_id != last_commit:
                    # Cache and db don't match, removing cache
                    msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \
                          "Commit id mismatch for revision %s (File Cache:%d, Database: %d). " % (
                              last_rev, commit_id, last_commit) + \
                          "It's not possible to continue, the cache " + \
                          "file should be removed and the database cleaned up"
                    raise CacheFileMismatch(msg)
            else:
                # Database looks empty (or corrupt) and we have
                # a cache file. We can just remove it and continue
                # normally
                self.__init_caches()
                os.remove(self.cache_file)
                printout("Database looks empty, removing cache file %s",
                         (self.cache_file, ))
        elif last_rev is not None:
            # There are data in the database,
            # but we don't have a cache file!!!
            msg = "Cache file %s is not up to date or it's corrupt: " % (self.cache_file) + \
                  "Cache file cannot be found" + \
                  "It's not possible to continue, the database " + \
                  "should be cleaned up"
            raise CacheFileMismatch(msg)
Beispiel #8
0
    def __add_new_file_and_link (self, file_name, parent_id, commit_id, file_path):
        dbfile = DBFile (None, file_name)
        dbfile.repository_id = self.repo_id
        self.cursor.execute (statement (DBFile.__insert__, self.db.place_holder), (dbfile.id, dbfile.file_name, dbfile.repository_id))
        
        dblink = DBFileLink (None, parent_id, dbfile.id, file_path)
        dblink.commit_id = commit_id
        self.cursor.execute (statement (DBFileLink.__insert__, self.db.place_holder), (dblink.id, dblink.parent, dblink.child, dblink.commit_id, dblink.file_path))

        return dbfile.id
Beispiel #9
0
    def foreach(self, cb, order=None):
        self.flush()
        
        cnn = self.db.connect()

        if order is None or order == ContentHandler.ORDER_REVISION:
            query = "SELECT object from _temp_log order by id desc"
        else:
            query = "SELECT object from _temp_log order by date asc"

        # We need to split the query to save memory
        icursor = ICursor(cnn.cursor(), self.INTERVAL_SIZE)
        icursor.execute(statement(query, self.db.place_holder))
        rs = icursor.fetchmany()
        while rs:
            for t in rs:
                obj = t[0]
                io = BytesIO(obj)
                commit = load(io)
                io.close()
                cb(commit)

            rs = icursor.fetchmany()

        icursor.close()
        cnn.close()
        def ensure_tag(tag):
            profiler_start("Ensuring tag %s for repository %d", (tag, self.repo_id))
            printdbg("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from tags where name = ?", self.db.place_holder), (tag,))
            rs = cursor.fetchone()
            if not rs:
                t = DBTag(None, tag)
                cursor.execute(statement(DBTag.__insert__, self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d", (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?", self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__, self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d", (branch, self.repo_id), True)

            return branch_id
Beispiel #12
0
    def __writer(self, queue):
        cnn = self.db.connect()
        cursor = cnn.cursor()

        commits = []
        n_commits = 0
        while True:
            commit = queue.get()

            if not isinstance(commit, Commit):
                queue.done()
                break

            io = StringIO()
            dump(commit, io, -1)
            obj = io.getvalue()
            io.close()

            commits.append(
                (commit.revision, commit.date, self.db.to_binary(obj)))
            n_commits += 1
            del commit

            if n_commits == 50:
                cursor.executemany(
                    statement(
                        "INSERT into _temp_log (rev, date, object) values (?, ?, ?)",
                        self.db.place_holder), commits)
                cnn.commit()
                del commits
                commits = []
                n_commits = 0

            queue.done()

        if commits:
            cursor.executemany(
                statement(
                    "INSERT into _temp_log (rev, date, object) values (?, ?, ?)",
                    self.db.place_holder), commits)
            cnn.commit()
            del commits

        cursor.close()
        cnn.close()
Beispiel #13
0
    def __writer(self, queue):
        cnn = self.db.connect()
        cursor = cnn.cursor()

        commits = []
        n_commits = 0
        while True:
            commit = queue.get()

            # If we receive a string, assume it's a kill
            # signal and end
            if isinstance(commit, str):
                queue.done()
                break

            io = BytesIO()
            dump(commit, io, -1)
            obj = io.getvalue()
            io.close()

            commits.append((commit.revision, commit.commit_date,
                            self.db.to_binary(obj)))
            n_commits += 1
            del commit

            if n_commits == 50:
                cursor.executemany(statement("INSERT into _temp_log " + \
                    "(rev, date, object) values (?, ?, ?)", 
                    self.db.place_holder), commits)
                cnn.commit()
                del commits
                commits = []
                n_commits = 0

            queue.done()

        if commits:
            cursor.executemany(statement("INSERT into _temp_log " + \
                    "(rev, date, object) values (?, ?, ?)", 
                    self.db.place_holder), commits)
            cnn.commit()
            del commits
            
        cursor.close()
        cnn.close()
Beispiel #14
0
 def __add_new_copy(self, dbfilecopy):
     self.cursor.execute(statement(DBFileCopy.__insert__,
                                   self.db.place_holder),
                          (dbfilecopy.id,
                           dbfilecopy.to_id,
                           dbfilecopy.from_id,
                           dbfilecopy.from_commit,
                           dbfilecopy.new_file_name,
                           dbfilecopy.action_id))
Beispiel #15
0
        def ensure_tag (tag):
            profiler_start ("Ensuring tag %s for repository %d",
                            (tag, self.repo_id))
            printdbg ("DBContentHandler: ensure_tag %s", (tag,))
            cursor = self.cursor

            cursor.execute (statement ("SELECT id from tags where name = ?",
                            self.db.place_holder), (tag,))
            rs = cursor.fetchone ()
            if not rs:
                t = DBTag (None, tag)
                cursor.execute (statement (DBTag.__insert__,
                                self.db.place_holder), (t.id, t.name))
                tag_id = t.id
            else:
                tag_id = rs[0]

            profiler_stop ("Ensuring tag %s for repository %d", (tag, self.repo_id), True)

            return tag_id
Beispiel #16
0
    def __writer(self, queue):
        cnn = self.db.connect()
        cursor = cnn.cursor()

        commits = []
        n_commits = 0
        while True:
            commit = queue.get()

            if not isinstance(commit, Commit):
                queue.done()
                break

            io = StringIO()
            dump(commit, io, -1)
            obj = io.getvalue()
            io.close()

            commits.append((commit.revision, commit.date, self.db.to_binary(obj)))
            n_commits += 1
            del commit

            if n_commits == 50:
                cursor.executemany(
                    statement("INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder),
                    commits)
                cnn.commit()
                del commits
                commits = []
                n_commits = 0

            queue.done()

        if commits:
            cursor.executemany(
                statement("INSERT into _temp_log (rev, date, object) values (?, ?, ?)", self.db.place_holder), commits)
            cnn.commit()
            del commits

        cursor.close()
        cnn.close()
Beispiel #17
0
        def ensure_branch(branch):
            profiler_start("Ensuring branch %s for repository %d",
                            (branch, self.repo_id))
            printdbg("DBContentHandler: ensure_branch %s", (branch,))
            cursor = self.cursor

            cursor.execute(statement("SELECT id from branches where name = ?",
                            self.db.place_holder), (branch,))
            rs = cursor.fetchone()
            if not rs:
                b = DBBranch(None, branch)
                cursor.execute(statement(DBBranch.__insert__,
                                self.db.place_holder), (b.id, b.name))
                branch_id = b.id
            else:
                branch_id = rs[0]

            profiler_stop("Ensuring branch %s for repository %d",
                          (branch, self.repo_id), True)

            return branch_id
Beispiel #18
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id)
                       for a in self.actions]
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBAction.__insert__, self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.date, c.date_tz,
                        c.author_date, c.author_date_tz, c.message,
                        c.composed_rev, c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBLog.__insert__, self.db.place_holder), commits)
            p = re.compile('((?:(?:OA)|(?:CCIESC))-\d+)', re.IGNORECASE)
            for commit in commits:
                m = p.findall(commit[8])
                for bug in m:
                    issue_commit_link = (commit[0], bug)
                    cursor.execute(
                        statement(DBIssueCommitLink.__insert__,
                                  self.db.place_holder), issue_commit_link)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id, ))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id, ))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id, ))
Beispiel #19
0
    def __add_file_path(self, commit_id, file_id, path):
        """Add the latest full path of a given file_id and commit_id
           to the table file_paths."""
        try:
            file_path = path.split("://", 1)[1]
        except IndexError:
            file_path = path

        db_file_path = DBFilePath(None, commit_id, file_id, file_path)
        self.cursor.execute(statement(DBFilePath.__insert__,
                                      self.db.place_holder),
                            (db_file_path.id,
                             db_file_path.commit_id,
                             db_file_path.file_id,
                             db_file_path.file_path))
Beispiel #20
0
    def __action_rename(self, path, prefix, log, action, dbaction):
        """Process a renamed file"""
        new_parent_path = os.path.dirname(path)
        new_file_name = os.path.basename(path)

        from_commit_id = self.revision_cache.get(action.rev, None)

        if action.branch_f2:
            branch_f2_id = self.__get_branch(action.branch_f2)
            old_path = "%d://%s" % (branch_f2_id, action.f2)
        else:
            old_path = prefix + action.f2
        file_id, parent_id = self.__get_file_for_path(old_path,
                                            from_commit_id, True)

        dbfilecopy = DBFileCopy(None, file_id)
        dbfilecopy.action_id = dbaction.id
        dbfilecopy.from_commit = from_commit_id

        if not new_parent_path or new_parent_path == prefix.strip('/'):
            new_parent_id = -1
        else:
            new_parent_id = self.__get_file_for_path(new_parent_path,
                                                     log.id)[0]
        if new_parent_id != parent_id:
            # It's not a simple rename, but a move operation
            # we have to write down the new link
            parent_id = new_parent_id
            dblink = DBFileLink(None, parent_id, file_id)
            dblink.commit_id = log.id
            self.cursor.execute(statement(DBFileLink.__insert__,
                                 self.db.place_holder),
                                (dblink.id, dblink.parent, dblink.child,
                                 dblink.commit_id))
            self.moves_cache[path] = old_path

        self.file_cache[path] = (file_id, parent_id)

        # Move/rename is a special case of copy.  # There's not a
        # new file_id
        dbfilecopy.from_id = file_id
        dbfilecopy.new_file_name = new_file_name
        self.__add_new_copy(dbfilecopy)

        # Save also file_path
        self.__add_file_path(log.id, file_id, path)

        return file_id
Beispiel #21
0
 def do_delete(self, delete_statement, params=None,
               error_message="Delete failed, data needs manual cleanup"):
     if self.repo_id is None:
         # Repo wasn't found anyway, so continue
         return True
     
     # You can't reference instance variables in default
     # parameters, so I have to do this.
     if params is None:
         params = (self.repo_id,)
     
     try:
         delete_cursor = self.connection.cursor()
         execute_statement(statement(delete_statement, 
                                     self.db.place_holder),
                           params, delete_cursor,
                           self.db, error_message)
     except Exception:
         printdbg("Deletion exception")
     finally:
         delete_cursor.close()
Beispiel #22
0
        printdbg("Tables not created, database already exists")
        db_exists = True
    except DatabaseException, e:
        printerr("Database error: %s", (e.message, ))
        return 1

    if config.no_parse and not db_exists:
        printerr("The option --no-parse must be used with an already " + \
                 "filled database")
        return 1

    # Add repository to Database
    if db_exists:
        printdbg("Database exists, so looking for existing repository")
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (uri, ))
        rep = cursor.fetchone()
        initialize_ids(db, cursor)
        cursor.close()

    if config.no_parse and rep is None:
        printerr("The option --no-parse must be used with an already " + \
                 "filled database")
        return 1

    if not db_exists or rep is None:
        # We consider the name of the repo as the last item of the root path
        name = uri.rstrip("/").split("/")[-1].strip()
        cursor = cnn.cursor()
        rep = DBRepository(None, uri, name, repo.get_type())
        cursor.execute(statement(DBRepository.__insert__, db.place_holder),
Beispiel #23
0
    def commit(self, commit):
        if commit.revision in self.revision_cache:
            return

        profiler_start("New commit %s for repository %d", (commit.revision,
                                                           self.repo_id))

        log = DBLog(None, commit)
        log.repository_id = self.repo_id
        self.revision_cache[commit.revision] = log.id

        log.committer = self.__get_person(commit.committer)

        if commit.author == commit.committer:
            log.author = log.committer
        elif commit.author is not None:
            log.author = self.__get_person(commit.author)

        self.commits.append(log)

        printdbg("DBContentHandler: commit: %d rev: %s", (log.id, log.rev))

        # TODO: sort actions? R, A, D, M, V, C
        for action in commit.actions:
            printdbg("DBContentHandler: Action: %s", (action.type,))
            dbaction = DBAction(None, action.type)
            dbaction.commit_id = log.id

            branch = commit.branch or action.branch_f1
            branch_id = self.__get_branch(branch)
            dbaction.branch_id = branch_id

            prefix = "%d://" % (branch_id)
            path = prefix + action.f1

            if action.type == 'A':
                # A file has been added
                file_id = self.__action_add(path, prefix, log)
            elif action.type == 'M':
                # A file has been modified
                file_id = self.__get_file_for_path(path, log.id)[0]
            elif action.type == 'D':
                # A file has been deleted
                file_id = self.__action_delete(path, log)
            elif action.type == 'V':
                # A file has been renamed
                file_id = self.__action_rename(path, prefix, log, action,
                                               dbaction)
            elif action.type == 'C':
                # A file has been copied
                file_id = self.__action_copy(path, prefix, log, action,
                                             dbaction)
            elif action.type == 'R':
                # A file has been replaced
                file_id = self.__action_replace(path, prefix, log, action,
                                                dbaction)
                if file_id is None:
                    continue
            else:
                assert "Unknown action type %s" % (action.type)

            dbaction.file_id = file_id
            self.actions.append(dbaction)

        # Tags
        if commit.tags is not None:
            tag_revs = []
            for tag in commit.tags:
                tag_id = self.__get_tag(tag)
                db_tagrev = DBTagRev(None)
                tag_revs.append((db_tagrev.id, tag_id, log.id))

            self.cursor.executemany(statement(DBTagRev.__insert__,
                                              self.db.place_holder), tag_revs)

        if len(self.actions) >= self.MAX_ACTIONS:
            printdbg("DBContentHandler: %d actions inserting",
                     (len(self.actions),))
            self.__insert_many()

        profiler_stop("New commit %s for repository %d", (commit.revision,
                                                          self.repo_id), True)
Beispiel #24
0
    from io import BytesIO
    from Database import create_database, ICursor

    uri = "http://svn.test-cvsanaly.org/svn/test"

    db = create_database('mysql', 'dbcontenthandler', sys.argv[1], None,
                         'localhost')
    cnn = db.connect()

    tables = ['actions', 'branches', 'file_copies', 'file_links', 'files',
              'people', 'repositories', 'scmlog', 'tag_revisions', 'tags']

    cursor = cnn.cursor()
    for table in tables:
        query = "delete from %s" % (table)
        cursor.execute(statement(query, db.place_holder))
    cursor.close()
    cnn.commit()

    name = uri.rstrip("/").split("/")[-1].strip()
    cursor = cnn.cursor()
    rep = DBRepository(None, uri, name, 'svn')
    cursor.execute(statement(DBRepository.__insert__, db.place_holder),
                   (rep.id, rep.uri, rep.name, rep.type))
    cursor.close()
    cnn.commit()

    ch = DBContentHandler(db)
    ch.begin()
    ch.repository(uri)
Beispiel #25
0
    except TableAlreadyExists:
        printdbg("Tables not created, database already exists")
        db_exists = True
    except DatabaseException, e:
        printerr("Database error: %s", (e.message,))
        return 1

    if config.no_parse and not db_exists:
        printerr("The option --no-parse must be used with an already " + \
                 "filled database")
        return 1

    # Add repository to Database
    if db_exists:
        printdbg("Database exists, so looking for existing repository")
        cursor.execute(statement("SELECT id from repositories where uri = ?",
                                 db.place_holder), (uri,))
        rep = cursor.fetchone()
        initialize_ids(db, cursor)
        cursor.close()

    if config.no_parse and rep is None:
        printerr("The option --no-parse must be used with an already " + \
                 "filled database")
        return 1

    if not db_exists or rep is None:
        # We consider the name of the repo as the last item of the root path
        name = uri.rstrip("/").split("/")[-1].strip()
        cursor = cnn.cursor()
        rep = DBRepository(None, uri, name, repo.get_type())
        cursor.execute(statement(DBRepository.__insert__, db.place_holder),
Beispiel #26
0
    from cStringIO import StringIO
    from cPickle import dump, load
    from Database import create_database, DBRepository, ICursor

    uri = "http://svn.test-cvsanaly.org/svn/test"

    db = create_database('mysql', 'dbcontenthandler', sys.argv[1], None, 'localhost')
    cnn = db.connect()

    tables = ['actions', 'branches', 'file_copies', 'file_links', 'files',
              'people', 'repositories', 'scmlog', 'tag_revisions', 'tags']

    cursor = cnn.cursor()
    for table in tables:
        query = "delete from %s" % (table)
        cursor.execute(statement(query, db.place_holder))
    cursor.close()
    cnn.commit()

    name = uri.rstrip("/").split("/")[-1].strip()
    cursor = cnn.cursor()
    rep = DBRepository(None, uri, name, 'svn')
    cursor.execute(statement(DBRepository.__insert__, db.place_holder), (rep.id, rep.uri, rep.name, rep.type))
    cursor.close()
    cnn.commit()

    ch = DBContentHandler(db)
    ch.begin()
    ch.repository(uri)

    # We need to split the query to save memory
class DBContentHandler(ContentHandler):

    MAX_ACTIONS = 100

    def __init__(self, db):
        ContentHandler.__init__(self)

        self.db = db
        self.cnn = None
        self.cursor = None

        self.__init_caches()

    def __init_caches(self):
        self.file_cache = {}
        self.moves_cache = {}
        self.deletes_cache = {}
        self.revision_cache = {}
        self.branch_cache = {}
        self.tags_cache = {}
        self.people_cache = {}

    def __save_caches_to_disk(self):
        printdbg("DBContentHandler: Saving caches to disk (%s)",
                 (self.cache_file, ))
        cache = [
            self.file_cache, self.moves_cache, self.deletes_cache,
            self.revision_cache, self.branch_cache, self.tags_cache,
            self.people_cache
        ]
        f = open(self.cache_file, 'w')
        dump(cache, f, -1)
        f.close()

    def __load_caches_from_disk(self):
        printdbg("DBContentHandler: Loading caches from disk (%s)",
                 (self.cache_file, ))
        f = open(self.cache_file, 'r')
        (self.file_cache, self.moves_cache, self.deletes_cache,
         self.revision_cache, self.branch_cache, self.tags_cache,
         self.people_cache) = load(f)
        f.close()

    def __del__(self):
        if self.cnn is not None:
            self.cnn.close()

    def begin(self, order=None):
        self.cnn = self.db.connect()

        self.cursor = self.cnn.cursor()

        self.commits = []
        self.actions = []

    def repository(self, uri):
        cursor = self.cursor
        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      self.db.place_holder), (uri, ))
        self.repo_id = cursor.fetchone()[0]

        last_rev = last_commit = None
        query = """SELECT rev, id from scmlog
                where id = (select max(id) from scmlog
                            where repository_id = ?)"""
        cursor.execute(statement(query, self.db.place_holder),
                       (self.repo_id, ))
        rs = cursor.fetchone()
        if rs is not None:
            last_rev, last_commit = rs

        filename = uri.replace('/', '_')
        self.cache_file = os.path.join(cvsanaly_cache_dir(), filename)

        # if there's a previous cache file, just use it
        if os.path.isfile(self.cache_file):
            self.__load_caches_from_disk()

            if last_rev is not None:
                try:
                    commit_id = self.revision_cache[last_rev]
                except KeyError:
                    msg = "".join([
                        "Cache file %s is not up to date or it's corrupt: " % \
                            (self.cache_file),
                        "Revision %s was not found in the cache file" % \
                            (last_rev),
                        "It's not possible to continue, the cache ",
                        "file should be removed and the database cleaned up"])
                    raise CacheFileMismatch(msg)
                if commit_id != last_commit:
                    # Cache and db don't match, removing cache
                    msg = "".join([
                        "Cache file %s is not up to date or it's corrupt: " % \
                            (self.cache_file),
                        "Commit id mismatch for revision %s " % (last_rev),
                        "(File Cache:%d, Database: %d). " % \
                            (commit_id, last_commit),
                        "It's not possible to continue, the cache ",
                        "file should be removed and the database cleaned up"])
                    raise CacheFileMismatch(msg)
            else:
                # Database looks empty (or corrupt) and we have
                # a cache file. We can just remove it and continue
                # normally
                self.__init_caches()
                os.remove(self.cache_file)
                printout("Database looks empty, removing cache file %s",
                         (self.cache_file, ))
        elif last_rev is not None:
            # There are data in the database,
            # but we don't have a cache file!!!
            msg = "".join([
                "Cache file %s is not up to date or it's corrupt: " % \
                    (self.cache_file),
                "Cache file cannot be found",
                "It's not possible to continue, the database ",
                "should be cleaned up"])
            raise CacheFileMismatch(msg)

    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id, ))
            for a in self.actions:
                action_tuple = (a.id, a.type, a.file_id, a.commit_id,
                                a.branch_id, a.current_file_path)
                if isinstance(self.db, MysqlDatabase):
                    import MySQLdb
                    try:
                        cursor.execute(
                            statement(DBAction.__insert__,
                                      self.db.place_holder), action_tuple)
                    except MySQLdb.IntegrityError, e:
                        if e.args[0] == 1062:
                            # Duplicate entry
                            pass
                else:
                    cursor.execute(
                        statement(DBAction.__insert__, self.db.place_holder),
                        action_tuple)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id, ))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.commit_date, \
                        c.author_date, to_utf8(c.message).decode("utf-8"), \
                        c.composed_rev, c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id, ))
            cursor.executemany(
                statement(DBLog.__insert__, self.db.place_holder), commits)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id, ))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id, ))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d", (self.repo_id, ))
Beispiel #28
0
    from cStringIO import StringIO
    from cPickle import dump, load
    from Database import create_database, DBRepository, ICursor

    uri = "http://svn.test-cvsanaly.org/svn/test"
    
    db = create_database ('mysql', 'dbcontenthandler', sys.argv[1], None, 'localhost')
    cnn = db.connect ()

    tables = ['actions', 'branches', 'file_copies', 'file_links', 'files',
              'people', 'repositories', 'scmlog', 'tag_revisions', 'tags']
    
    cursor = cnn.cursor ()
    for table in tables:
        query = "delete from %s" % (table)
        cursor.execute (statement (query, db.place_holder))
    cursor.close ()
    cnn.commit ()

    name = uri.rstrip ("/").split ("/")[-1].strip ()
    cursor = cnn.cursor ()
    rep = DBRepository (None, uri, name, 'svn')
    cursor.execute (statement (DBRepository.__insert__, db.place_holder), (rep.id, rep.uri, rep.name, rep.type))
    cursor.close ()
    cnn.commit ()

    ch = DBContentHandler (db)
    ch.begin ()
    ch.repository (uri)

    # We need to split the query to save memory