Ejemplo n.º 1
0
    def run(self, repo, uri, db):
        profiler_start("Running Blame extension")

        self.db = db

        cnn = self.db.connect()
        read_cursor = cnn.cursor()
        write_cursor = cnn.cursor()

        blames = []

        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement("SELECT id from repositories " + \
                                          "where uri = ?", db.place_holder), 
                                          (repo_uri,))
            repoid = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError("Blame extension is not supported for " + \
                                    "%s repositories" % (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError("Error creating repository %s. " + \
                                    "Exception: %s" % (repo.get_uri(), str(e)))
Ejemplo n.º 2
0
    def run(self, repo, uri, db):            
        # Start the profiler, per every other extension
        profiler_start("Running FileCount extension")
        
        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()
        
        # Try to get the repository and get its ID from the database
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "FileCount extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
Ejemplo n.º 3
0
    def run(self, repo, uri, db):
        def patch_generator(repo, repo_uri, repo_id, db, cursor):
            icursor = ICursor(cursor, self.INTERVAL_SIZE)
            icursor.execute(
                statement("SELECT id, rev, composed_rev " + "from scmlog where repository_id = ?", db.place_holder),
                (repo_id,),
            )

            rs = icursor.fetchmany()

            while rs:
                for commit_id, revision, composed_rev in rs:
                    # Get the patch
                    pj = PatchJob(revision, commit_id)

                    path = uri_to_filename(repo_uri)
                    pj.run(repo, path or repo.get_uri())

                    # Yield the patch to hunks
                    yield (pj.commit_id, pj.data, pj.rev)

                rs = icursor.fetchmany()

        profiler_start("Running PatchesAndHunks extension")

        hunks = Hunks()
        hunks.get_patches = patch_generator
        hunks.run(repo, uri, db)
Ejemplo n.º 4
0
    def run(self, repo, uri, db):
        def patch_generator(repo, repo_uri, repo_id, db, cursor):
            icursor = ICursor(cursor, self.INTERVAL_SIZE)
            icursor.execute(statement("SELECT id, rev, composed_rev " + \
                                      "from scmlog where repository_id = ?",
                                      db.place_holder), (repo_id,))

            rs = icursor.fetchmany()

            while rs:
                for commit_id, revision, composed_rev in rs:
                    # Get the patch
                    pj = PatchJob(revision, commit_id)

                    path = uri_to_filename(repo_uri)
                    pj.run(repo, path or repo.get_uri())

                    p = DBPatch(db, commit_id, pj.data)
                    # Yield the patch to hunks
                    for file_id, patch in p.file_patches():
                        yield (pj.commit_id, file_id, patch, pj.rev)

                rs = icursor.fetchmany()


        profiler_start("Running PatchesAndHunks extension")

        hunks = Hunks()
        hunks.get_patches = patch_generator
        hunks.run(repo, uri, db)
Ejemplo n.º 5
0
 def get_path_from_database(self, file_id, commit_id):
     """Returns the last valid path for a given file_id at commit_id
        (May have been removed afterwords!)"""
     
     if config.debug:
         profiler_start("Getting full file path for file_id %d and \
                         commit_id %d", (file_id, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     
     cursor = cnn.cursor()
     query = """SELECT current_file_path from actions
                WHERE file_id=? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder), (file_id, commit_id))
     try:
         file_path = cursor.fetchone()[0]
     except:
         file_path = None
     
     cursor.close()
     cnn.close()
     
     printdbg("get_path_from_database:\
               Path for file_id %d at commit_id %d: %s",
              (file_id, commit_id, file_path))
     if config.debug:
         profiler_stop("Getting full file path for file_id %d and\
                          commit_id %d", (file_id, commit_id), True)
     return file_path
Ejemplo n.º 6
0
 def get_path_from_database(self, file_id, commit_id):
     """Returns the last valid path for a given file_id at commit_id
        (May have been removed afterwords!)"""
     
     if config.debug:
         profiler_start("Getting full file path for file_id %d and \
                         commit_id %d", (file_id, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     
     cursor = cnn.cursor()
     query = """SELECT file_path from file_paths
                WHERE file_id=? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder), (file_id, commit_id))
     try:
         file_path = cursor.fetchone()[0]
     except:
         file_path = None
     
     cursor.close()
     cnn.close()
     
     printdbg("get_path_from_database:\
               Path for file_id %d at commit_id %d: %s",
              (file_id, commit_id, file_path))
     if config.debug:
         profiler_stop("Getting full file path for file_id %d and\
                          commit_id %d", (file_id, commit_id), True)
     return file_path
Ejemplo n.º 7
0
 def get_file_id(self, file_path, commit_id):
     """Ask for the file_id for a given file_path and commit_id"""
     
     if config.debug:
         profiler_start("Getting file id for file_path %s and commit_id %d",
                         (file_path, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     cursor = cnn.cursor()
     query = """SELECT file_id from actions
                WHERE binary current_file_path = ? AND commit_id = ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder),
                     (file_path, commit_id))
     try:
         file_id = cursor.fetchone()[0]
     except:
         file_id = None
     
     cursor.close()
     cnn.close()
     
     if config.debug:
         profiler_stop("Getting file id for file_path %s and commit_id %d",
                        (file_path, commit_id), True)
     
     return file_id
Ejemplo n.º 8
0
    def run(self, repo, uri, db):
        # Start the profiler, per every other extension
        profiler_start("Running FileCount extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()

        # Try to get the repository and get its ID from the database
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "FileCount extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
Ejemplo n.º 9
0
    def run (self, repo, repo_uri):
        profiler_start("Running BlameJob for %s@%s", (self.path,self.rev))
        def blame_line (line, p):
            p.feed (line)

        repo_type = repo.get_type ()
        if repo_type == 'cvs':
            # CVS paths contain the module stuff
            uri = repo.get_uri_for_path (repo_uri)
            module = uri[len (repo.get_uri ()):].strip ('/')

            if module != '.':
                path = self.path[len (module):].strip ('/')
            else:
                path = self.path.strip ('/')
        else:
            path = self.path.strip ('/')

        filename = os.path.basename (self.path)
        p = create_parser (repo.get_type (), self.path)
        out = self.get_content_handler()
        p.set_output_device (out)
        wid = repo.add_watch (BLAME, blame_line, p)
        try:
            repo.blame (os.path.join (repo_uri, path), self.rev)
            self.collect_results(out)
        except RepositoryCommandError, e:
            self.failed = True
            printerr ("Command %s returned %d (%s)", (e.cmd, e.returncode, e.error))
Ejemplo n.º 10
0
    def run(self, repo, uri, db):
        profiler_start("Running HunkBlame extension")

        self.db = db

        cnn = self.db.connect()
        read_cursor = cnn.cursor()
        write_cursor = cnn.cursor()
        try:
            path = uri_to_filename(uri)
            if path is not None:
                repo_uri = repo.get_uri_for_path(path)
            else:
                repo_uri = uri

            read_cursor.execute(
                statement("SELECT id from repositories where uri = ?",
                          db.place_holder), (repo_uri, ))
            repoid = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError(
                "HunkBlame extension is not supported for %s repositories" %
                (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError(
                "Error creating repository %s. Exception: %s" %
                (repo.get_uri(), str(e)))
Ejemplo n.º 11
0
 def get_file_id(self, file_path, commit_id):
     """Ask for the file_id for a given file_path and commit_id"""
     
     if config.debug:
         profiler_start("Getting file id for file_path %s and commit_id %d",
                         (file_path, commit_id))
     
     db = self.__dict__['db']
     cnn = db.connect()
     cursor = cnn.cursor()
     query = """SELECT file_id from file_paths
                WHERE file_path = ? AND commit_id <= ?
                ORDER BY commit_id DESC LIMIT 1"""
     cursor.execute(statement(query, db.place_holder),
                     (file_path, commit_id))
     try:
         file_id = cursor.fetchone()[0]
     except:
         file_id = None
     
     cursor.close()
     cnn.close()
     
     if config.debug:
         profiler_stop("Getting file id for file_path %s and commit_id %d",
                        (file_path, commit_id), True)
     
     return file_id
Ejemplo n.º 12
0
    def run(self, repo, uri, db):
        profiler_start("Running PatchLOC extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        cursor = connection.cursor()

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cursor.execute(
            statement("SELECT id from repositories where uri = ?",
                      db.place_holder), (repo_uri, ))
        repo_id = cursor.fetchone()[0]

        try:
            self.__create_table(connection)
        except TableAlreadyExists:
            pass
        except Exception, e:
            raise ExtensionRunError(str(e))
Ejemplo n.º 13
0
 def line(self,blame_line):
     if not self.profiled:
         profiler_start("Processing blame output for %s",(self.filename))
         self.profiled=True 
     for hunk_id, start_line, end_line in self.hunks:
         if blame_line.line>= start_line and blame_line.line<= end_line:
             if self.bug_revs.get(hunk_id) is None:
                 self.bug_revs[hunk_id] = set()
             self.bug_revs[hunk_id].add(blame_line.rev)
             break
Ejemplo n.º 14
0
    def get_path(self, file_id, commit_id, repo_id):
        profiler_start("Getting path for file %d at commit %d", (file_id, commit_id))

        adj = self.__dict__['adj']
        assert adj is not None, "Matrix no updated"

        path = self.__build_path(file_id, adj)

        profiler_stop("Getting path for file %d at commit %d", (file_id, commit_id), True)
        return path
Ejemplo n.º 15
0
 def line(self, blame_line):
     if not self.profiled:
         profiler_start("Processing blame output for %s",
                        (self.filename))
         self.profiled = True
     for hunk_id, start_line, end_line in self.hunks:
         if blame_line.line >= start_line and blame_line.line <= end_line:
             if self.bug_revs.get(hunk_id) is None:
                 self.bug_revs[hunk_id] = set()
             self.bug_revs[hunk_id].add(blame_line.rev)
             break
Ejemplo n.º 16
0
    def update_all(self, repo_id):
        """
        update_all enable cache for adjacency matrices
        Pros: File paths in different revisions can be
        accessed randomly, i.e. after calling update_all,
        get_path can be called with any revision in any
        order.
        Cons: It consumes significant memory to store
        the adjacency matrices

        If the config has low_memory set to true, shelve will
        be used instead, to write the cache out to disk.
        """
        profiler_start("Update all file paths")
        
        if Config().low_memory:
            self.shelve_file_name = str(time()) + "-shelve.db"
            
            # If there is an old file, shelf will complain viciously
            if os.path.exists(self.shelve_file_name):
                os.remove(self.shelve_file_name)
            
            self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, 
                                                        writeback=False)
        
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.commit_date"""
        cursor.execute(statement(query, db.place_holder), (repo_id,))
        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                adj = self.__dict__['cached_adj'].get(str(id))

                if adj is None:
                    self.update_for_revision(cursor, id, repo_id)
                    self.__dict__['cached_adj'][str(id)] = \
                    deepcopy(self.__dict__['adj'])
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
Ejemplo n.º 17
0
    def update_all(self, repo_id):
        """
        update_all enable cache for adjacency matrices
        Pros: File paths in different revisions can be
        accessed randomly, i.e. after calling update_all,
        get_path can be called with any revision in any
        order.
        Cons: It consumes significant memory to store
        the adjacency matrices

        If the config has low_memory set to true, shelve will
        be used instead, to write the cache out to disk.
        """
        profiler_start("Update all file paths")
        
        if Config().low_memory:
            self.shelve_file_name = str(time()) + "-shelve.db"
            
            # If there is an old file, shelf will complain viciously
            if os.path.exists(self.shelve_file_name):
                os.remove(self.shelve_file_name)
            
            self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, 
                                                        writeback=False)
        
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.date"""
        cursor.execute(statement(query, db.place_holder), (repo_id,))
        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                adj = self.__dict__['cached_adj'].get(str(id))

                if adj is None:
                    self.update_for_revision(cursor, id, repo_id)
                    self.__dict__['cached_adj'][str(id)] = \
                    deepcopy(self.__dict__['adj'])
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
Ejemplo n.º 18
0
    def __build_path(self, file_id, adj):
        if file_id not in adj.adj:
            return None

        profiler_start("Building path for file %d", (file_id,))

        tokens = []
        id = file_id

        while id != -1:
            tokens.insert(0, adj.files[id])
            id = adj.adj.get(id,-1)

        profiler_stop("Building path for file %d", (file_id,), True)

        return "/" + "/".join(tokens)
Ejemplo n.º 19
0
    def __build_path(self, file_id, adj):
        if file_id not in adj.adj:
            return None

        profiler_start("Building path for file %d", (file_id,))

        tokens = []
        id = file_id

        while id != -1:
            tokens.insert(0, adj.files[id])
            id = adj.adj[id]

        profiler_stop("Building path for file %d", (file_id,), True)

        return "/" + "/".join(tokens)
Ejemplo n.º 20
0
    def __build_path(self, file_id, adj):
        if file_id not in adj.adj:
            return None

        profiler_start("Building path for file %d", (file_id,))
        
        tokens = []
        id = file_id
        
        while id is not None and id != -1:
            tokens.insert(0, adj.files[id])
            #use get instead of index to avoid key error
            id = adj.adj.get(id) 

        profiler_stop("Building path for file %d", (file_id,), True)

        return "/" + "/".join(tokens)
Ejemplo n.º 21
0
    def __build_path(self, file_id, adj):
        if file_id not in adj.adj:
            return None

        profiler_start("Building path for file %d", (file_id,))
        
        tokens = []
        id = file_id
        
        while id is not None and id != -1:
            tokens.insert(0, adj.files[id])
            #use get instead of index to avoid key error
            id = adj.adj.get(id) 

        profiler_stop("Building path for file %d", (file_id,), True)

        return "/" + "/".join(tokens)
Ejemplo n.º 22
0
    def get_patches(self, repo, repo_uri, repo_id, db, cursor):
        profiler_start("Hunks: fetch all patches")
        icursor = ICursor(cursor, self.INTERVAL_SIZE)
        # Get the patches from this repository
        query = """select p.commit_id, p.file_id, p.patch, s.rev
                    from patches p, scmlog s
                    where p.commit_id = s.id and
                    s.repository_id = ? and
                    p.patch is not NULL"""
        icursor.execute(statement(query, db.place_holder), (repo_id, ))
        profiler_stop("Hunks: fetch all patches", delete=True)

        rs = icursor.fetchmany()

        while rs:
            for commit_id, file_id, patch_content, rev in rs:
                yield (commit_id, file_id, to_utf8(patch_content), rev)

            rs = icursor.fetchmany()
Ejemplo n.º 23
0
    def update_all(self, repo_id):
        profiler_start("Update all file paths")
        db = self.__dict__['db']
        cnn = db.connect ()

        cursor = cnn.cursor ()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.id"""
        cursor.execute (statement (query, db.place_holder), (repo_id,))        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall ()]
        for id in all_commits:
            if old_id != id:
                self.update_for_revision (cursor, id, repo_id)
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
Ejemplo n.º 24
0
    def update_all(self, repo_id):
        profiler_start("Update all file paths")
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.id"""
        cursor.execute(statement(query, db.place_holder), (repo_id, ))
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                self.update_for_revision(cursor, id, repo_id)
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
Ejemplo n.º 25
0
    def get_patches(self, repo, repo_uri, repo_id, db, cursor):
        profiler_start("Hunks: fetch all patches")
        icursor = ICursor(cursor, self.INTERVAL_SIZE)
        # Get the patches from this repository
        query = """select p.commit_id, p.file_id, p.patch, s.rev
                    from patches p, scmlog s
                    where p.commit_id = s.id and
                    s.repository_id = ? and
                    p.patch is not NULL"""
        icursor.execute(statement(query, db.place_holder), (repo_id,))
        profiler_stop("Hunks: fetch all patches", delete=True)

        rs = icursor.fetchmany()

        while rs:
            for commit_id, file_id, patch_content, rev in rs:
                yield (commit_id, file_id, to_utf8(patch_content), rev)
            
            rs = icursor.fetchmany()
Ejemplo n.º 26
0
    def run(self, repo, uri, db):
        profiler_start("Running Patches extension")
        self.db = db
        self.repo = repo

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        path = uri_to_filename(uri)
        self.repo_uri = path or repo.get_uri()

        cnn = self.db.connect()

        cursor = cnn.cursor()
        cursor.execute(statement("SELECT id from repositories where uri = ?",
                                 db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone()[0]

        # If table does not exist, the list of commits is empty,
        # otherwise it will be filled within the except block below
        commits = []

        try:
            printdbg("Creating patches table")
            self.__create_table(cnn)
        except TableAlreadyExists:
            printdbg("Patches table exists already, getting max ID")
            cursor.execute(statement("SELECT max(id) from patches",
                                     db.place_holder))
            id = cursor.fetchone()[0]
            if id is not None:
                DBPatch.id_counter = id + 1

            commits = self.__get_patches_for_repository(repo_id, cursor)
        except Exception, e:
            raise ExtensionRunError(str(e))
Ejemplo n.º 27
0
    def run(self, repo, uri, db):
        # Start the profiler, per every other extension
        profiler_start("Running BugFixMessage extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()

        # Try to get the repository and get its ID from the database
        try:
            repo_uri = get_repo_uri(uri, repo)
            repo_id = get_repo_id(repo_uri, read_cursor, db)

        except NotImplementedError:
            raise ExtensionRunError( \
                    "BugFixMessage extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
Ejemplo n.º 28
0
    def run(self, repo, uri, db):
        # Start the profiler, per every other extension
        profiler_start("Running BugFixMessage extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        read_cursor = connection.cursor()
        write_cursor = connection.cursor()

        # Try to get the repository and get its ID from the database
        try:
            repo_uri = get_repo_uri(uri, repo)
            repo_id = get_repo_id(repo_uri, read_cursor, db)

        except NotImplementedError:
            raise ExtensionRunError( \
                    "BugFixMessage extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
Ejemplo n.º 29
0
    def run(self, repo, uri, db):
        profiler_start("Running PatchLOC extension")

        # Open a connection to the database and get cursors
        self.db = db
        connection = self.db.connect()
        cursor = connection.cursor()

        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
        else:
            repo_uri = uri

        cursor.execute(statement("SELECT id from repositories where uri = ?",
                                 db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone()[0]

        try:
            self.__create_table(connection)
        except TableAlreadyExists:
            pass
        except Exception, e:
            raise ExtensionRunError(str(e))
Ejemplo n.º 30
0
            i = i + 1
            if i >= queuesize:
                printdbg("Content queue is now at %d, flushing to database",
                         (i, ))

                processed_jobs = self.__process_finished_jobs(
                    job_pool, write_cursor, db)
                connection.commit()
                i = i - processed_jobs
                if processed_jobs < (queuesize / 5):
                    job_pool.join()

        job_pool.join()
        self.__process_finished_jobs(job_pool, write_cursor, db)

        profiler_start("Inserting results in db")
        #self.__insert_many(write_cursor)
        connection.commit()
        profiler_stop("Inserting results in db")

        read_cursor.close()
        write_cursor.close()
        connection.close()

        # This turns off the profiler and deletes it's timings
        profiler_stop("Running content extension", delete=True)

    def backout(self, repo, uri, db):
        update_statement = """delete from content where
                              commit_id in (select id from scmlog s
                                            where s.repository_id = ?)"""
Ejemplo n.º 31
0
                    pass
                finally:
                    inner_cursor.close()

                hunks = [h for h in hunks if h[0] not in blames]
                job = HunkBlameJob(hunks, relative_path, pre_rev)

                job_pool.push(job)
                n_blames += 1

                if n_blames >= self.MAX_BLAMES:
                    processed_jobs = self.process_finished_jobs(
                        job_pool, write_cursor)
                    n_blames -= processed_jobs
                    if processed_jobs <= self.MAX_BLAMES / 5:
                        profiler_start("Joining unprocessed jobs")
                        job_pool.join()
                        profiler_stop("Joining unprocessed jobs", delete=True)
            except NotValidHunkWarning as e:
                printerr("Not a valid hunk: " + str(e))
            finally:
                file_rev = read_cursor.fetchone()

        job_pool.join()
        self.process_finished_jobs(job_pool, write_cursor, True)

        try:
            self.__drop_cache(cnn)
        except:
            printdbg("Couldn't drop cache because of " + str(e))
Ejemplo n.º 32
0
    def get_commit_data(self, patch_content):
        profiler_start("get_commit_data")
        lines = [l + "\n" for l in patch_content.splitlines() if l]
        hunks = []

        for patch in [p for p in parse_patches(lines, allow_dirty=True, \
                            allow_continue=True) if isinstance(p, Patch)]:
            # This method matches that of parseLine in UnifiedDiffParser.java
            # It's not necessarily intuitive, but this algorithm is much harder
            # than it looks, I spent hours trying to get a simpler solution.
            # It does, however, seem to work, which is pretty amazing when
            # you think about how difficult it is for long enough.
            # The trick that this method does is that each *part* of a hunk
            # ie. added, deleted, changed are treated as *new entities*.
            # The EntityDelta table does not store just diffs, it stores
            # each part of a diff.
            # I will need to copy the behavior of how Sep inserts a CommitData
            # into the database to ensure things match
            for hunk in patch.hunks:
                old_start_line = hunk.orig_pos - 1
                new_start_line = hunk.mod_pos - 1

                old_end_line = 0
                new_end_line = 0

                added = False
                deleted = False
                in_change = False

                for line in hunk.lines:
                    if isinstance(line, RemoveLine):
                        if not in_change or not deleted:
                            in_change = True
                            old_start_line += 1
                            old_end_line = old_start_line
                        else:
                            old_end_line += 1

                        deleted = True

                    elif isinstance(line, InsertLine):
                        if not in_change or not added:
                            in_change = True
                            new_start_line += 1
                            new_end_line = new_start_line
                        else:
                            new_end_line += 1

                        added = True

                    elif isinstance(line, ContextLine):
                        if in_change:
                            in_change = False
                            printdbg("Patch new name: " + patch.newname)
                            file_name = re.split('\s+', patch.newname)[0]
                            if file_name == "/dev/null":
                                file_name = re.split('\s+', patch.oldname)[0]
                            cd = CommitData(file_name)

                            if deleted:
                                cd.old_start_line = old_start_line
                                cd.old_end_line = old_end_line
                                old_start_line = old_end_line

                            if added:
                                cd.new_start_line = new_start_line
                                cd.new_end_line = new_end_line
                                new_start_line = new_end_line

                            hunks.append(cd)
                            added = deleted = False

                        old_start_line += 1
                        new_start_line += 1

                # The diff ended without a new context line
                if in_change:
                    cd = CommitData(re.split('\s+', patch.newname)[0])

                    if deleted:
                        cd.old_start_line = old_start_line
                        cd.old_end_line = old_end_line

                    if added:
                        cd.new_start_line = new_start_line
                        cd.new_end_line = new_end_line

                    hunks.append(cd)
        profiler_stop("get_commit_data")
        return hunks
Ejemplo n.º 33
0
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "Content extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))

        profiler_start("Hunks: fetch all patches")
        icursor = ICursor(read_cursor, self.INTERVAL_SIZE)
        # Get the patches from this repository
        query = """select p.commit_id, p.patch, s.rev 
                    from patches p, scmlog s 
                    where p.commit_id = s.id and
                    s.repository_id = ? and 
                    p.patch is not NULL"""
        icursor.execute(statement(query, db.place_holder), (repo_id, ))
        profiler_stop("Hunks: fetch all patches", delete=True)

        self.__prepare_table(connection)
        fp = FilePaths(db)
        rs = icursor.fetchmany()

        while rs:
Ejemplo n.º 34
0
                except Exception as e:
                    pass
                finally:
                    inner_cursor.close()
                    
                hunks = [h for h in hunks if h[0] not in blames]
                job = HunkBlameJob(hunks, relative_path, pre_rev)
                
                job_pool.push (job)
                n_blames += 1
        
                if n_blames >= self.MAX_BLAMES:
                    processed_jobs = self.process_finished_jobs (job_pool, write_cursor)
                    n_blames -= processed_jobs
                    if processed_jobs<=self.MAX_BLAMES/5:
                        profiler_start("Joining unprocessed jobs")
                        job_pool.join()
                        profiler_stop("Joining unprocessed jobs", delete=True)
            except NotValidHunkWarning as e:
                printerr("Not a valid hunk: "+str(e))
            finally:
                file_rev = read_cursor.fetchone()

        job_pool.join ()
        self.process_finished_jobs (job_pool, write_cursor, True)

        try:
            self.__drop_cache(cnn)
        except:
            printdbg("Couldn't drop cache because of " + str(e))
Ejemplo n.º 35
0
    def update_for_revision(self, cursor, commit_id, repo_id):
        db = self.__dict__['db']

        if commit_id == self.__dict__['rev']:
            return
        prev_commit_id = self.__dict__['rev']
        self.__dict__['rev'] = commit_id

        profiler_start("Updating adjacency matrix for commit %d", (commit_id,))
        if self.__dict__['adj'] is None:
            adj = Adj()
            self.__dict__['adj'] = adj
        else:
            adj = self.__dict__['adj']

        rf = self.__dict__['files']
        if rf is not None:
            repo_files_id, repo_files = rf
            if repo_files_id != repo_id:
                del self.__dict__['files']
                repo_files = {}
        else:
            repo_files = {}

        if not repo_files:
            # Get and cache all the files table
            query = "select id, file_name from files where repository_id = ?"
            # profiler_start("Getting files for repository %d", (repo_id,))
            cursor.execute(statement(query, db.place_holder), (repo_id,))
            # profiler_stop("Getting files for repository %d", (repo_id,), 
            # True)
            rs = cursor.fetchmany()
            while rs:
                for id, file_name in rs:
                    repo_files[id] = file_name
                rs = cursor.fetchmany()
            self.__dict__['files'] = (repo_id, repo_files)
            adj.files = repo_files

        # Get the files that have been renamed
        # with the new name for the given rev
        query = "select af.file_id, af.new_file_name " + \
                "from actions_file_names af, files f " + \
                "where af.file_id = f.id " + \
                "and af.commit_id = ? " + \
                "and af.type = 'V' " + \
                "and f.repository_id = ?"
        # profiler_start("Getting new file names for commit %d", (commit_id,))
        cursor.execute(statement(query, db.place_holder), (commit_id, repo_id))
        # profiler_stop("Getting new file names for commit %d", (commit_id,), 
        # True)
        rs = cursor.fetchmany()
        while rs:
            for id, file_name in rs:
                adj.files[id] = file_name
            rs = cursor.fetchmany()

        # Get the new file links since the last time
        query = "select fl.parent_id, fl.file_id " + \
                "from file_links fl, files f " + \
                "where fl.file_id = f.id "
        if prev_commit_id is None:
            query += "and fl.commit_id = ? "
            args = (commit_id, repo_id)
        else:
            query += "and fl.commit_id between ? and ? "
            args = (prev_commit_id, commit_id, repo_id)
        query += "and f.repository_id = ?"
#        profiler_start("Getting file links for commit %d", (commit_id,))
        cursor.execute(statement(query, db.place_holder), args)
#        profiler_stop("Getting file links for commit %d", (commit_id,), True)
        rs = cursor.fetchmany()
        while rs:
            for f1, f2 in rs:
                adj.adj[f2] = f1
            rs = cursor.fetchmany()

        profiler_stop("Updating adjacency matrix for commit %d",
                       (commit_id,), True)
Ejemplo n.º 36
0
    def get_commit_data(self, patch_content):
        profiler_start("get_commit_data")
        lines = [l + "\n" for l in patch_content.splitlines() if l]
        hunks = []

        for patch in [p for p in parse_patches(lines, allow_dirty=True, \
                            allow_continue=True) if isinstance(p, Patch)]:
            # This method matches that of parseLine in UnifiedDiffParser.java
            # It's not necessarily intuitive, but this algorithm is much harder
            # than it looks, I spent hours trying to get a simpler solution.
            # It does, however, seem to work, which is pretty amazing when
            # you think about how difficult it is for long enough.
            # The trick that this method does is that each *part* of a hunk
            # ie. added, deleted, changed are treated as *new entities*.
            # The EntityDelta table does not store just diffs, it stores
            # each part of a diff.
            # I will need to copy the behavior of how Sep inserts a CommitData
            # into the database to ensure things match
            for hunk in patch.hunks:
                old_start_line = hunk.orig_pos - 1
                new_start_line = hunk.mod_pos - 1

                old_end_line = 0
                new_end_line = 0

                added = False
                deleted = False
                in_change = False

                for line in hunk.lines:
                    if isinstance(line, RemoveLine):
                        if not in_change or not deleted:
                            in_change = True
                            old_start_line += 1
                            old_end_line = old_start_line
                        else:
                            old_end_line += 1

                        deleted = True

                    elif isinstance(line, InsertLine):
                        if not in_change or not added:
                            in_change = True
                            new_start_line += 1
                            new_end_line = new_start_line
                        else:
                            new_end_line += 1

                        added = True

                    elif isinstance(line, ContextLine):
                        if in_change:
                            in_change = False
                            printdbg("Patch new name: " + patch.newname)
                            file_name = re.split('\s+', patch.newname)[0]
                            if file_name == "/dev/null":
                                file_name = re.split('\s+', patch.oldname)[0]
                            cd = CommitData(file_name)

                            if deleted:
                                cd.old_start_line = old_start_line
                                cd.old_end_line = old_end_line
                                old_start_line = old_end_line

                            if added:
                                cd.new_start_line = new_start_line
                                cd.new_end_line = new_end_line
                                new_start_line = new_end_line

                            hunks.append(cd)
                            added = deleted = False

                        old_start_line += 1
                        new_start_line += 1

                # The diff ended without a new context line
                if in_change:
                    cd = CommitData(re.split('\s+', patch.newname)[0])

                    if deleted:
                        cd.old_start_line = old_start_line
                        cd.old_end_line = old_end_line

                    if added:
                        cd.new_start_line = new_start_line
                        cd.new_end_line = new_end_line

                    hunks.append(cd)
        profiler_stop("get_commit_data")
        return hunks
Ejemplo n.º 37
0
    def run(self, repo, uri, db):
        #record how many patches contains different file name
        function_name_change_count = 0
        #only suitable for my computer, user can change according to your own settings
        prefix = r'/home/moqi/Downloads/voldemort'
        #old file name
        f_of_old = open('/home/moqi/Downloads/voldemort/old', 'w')
        #new file name
        f_of_new = open('/home/moqi/Downloads/voldemort/new', 'w')
        #store information returns by search_lines
        search_result={}
        #number of exception
        #such as /null and file has been deleted so that can not open
        #not accurate
        num_of_exception = 0
        #number of file which do not belong to source files
        non_source_file = 0 
        #number of patch which commit_id = 1
        num_of_id1 = 0 
        #number of files can not be recovered
        num_of_unrecovered = 0
        #old_cla contains class definition in old file
        old_cla = sets.Set()
        new_cla = sets.Set()
        old_func = sets.Set()
        new_func = sets.Set()
        #max id in table patches
        id_max = 0
        #patch_id
        patch_id = 0
        #file_id
        file_id = 0
        ##old_class, new_class, old_function, new_function
        old_class = ''
        new_class = ''
        old_function = ''
        new_function = ''
        
        
        
        __insert__ = """INSERT INTO analyse_patch (patch_id, commit_id, file_id, old_class, new_class, 
                    old_function, new_function, if_id1)
                    values (?, ?, ?, ?, ?, ?, ?, ?)"""
        start = time.time()
        
        profiler_start("Running analyse_patch extension")
        self.db = db
        self.repo = repo
        
        path = uri_to_filename(uri)
        if path is not None:
            repo_uri = repo.get_uri_for_path(path)
            ##added by me
            prefix = path
        else:
            repo_uri = uri

        path = uri_to_filename(uri)
        self.repo_uri = path or repo.get_uri()

        cnn = self.db.connect()

        cursor = cnn.cursor()
        write_cursor = cnn.cursor()
        
        cursor.execute(statement("SELECT id from repositories where uri = ?",
                             db.place_holder), (repo_uri,))
        repo_id = cursor.fetchone()[0]

        try:
            printdbg("Creating analyse_patch table")
            self.__create_table(cnn)
        except TableAlreadyExists:
            pass
        except Exception, e:
            raise ExtensionRunError(str(e))
Ejemplo n.º 38
0
 def run(self, repo, repo_uri):
     profiler_start("Processing patch for revision %s", (self.rev))
     self.repo = repo
     self.repo_uri = repo_uri
     self.get_patch_for_commit()
     profiler_stop("Processing patch for revision %s", (self.rev))
Ejemplo n.º 39
0
    def update_for_revision(self, cursor, commit_id, repo_id):
        db = self.__dict__['db']

        if commit_id == self.__dict__['rev']:
            return
        prev_commit_id = self.__dict__['rev']
        self.__dict__['rev'] = commit_id

        profiler_start("Updating adjacency matrix for commit %d", (commit_id,))
        if self.__dict__['adj'] is None:
            adj = Adj()
            self.__dict__['adj'] = adj
        else:
            adj = self.__dict__['adj']

        rf = self.__dict__['files']
        if rf is not None:
            repo_files_id, repo_files = rf
            if repo_files_id != repo_id:
                del self.__dict__['files']
                repo_files = {}
        else:
            repo_files = {}

        if not repo_files:
            # Get and cache all the files table
            query = "select id, file_name from files where repository_id = ?"
            # profiler_start("Getting files for repository %d", (repo_id,))
            cursor.execute(statement(query, db.place_holder), (repo_id,))
            # profiler_stop("Getting files for repository %d", (repo_id,), 
            # True)
            rs = cursor.fetchmany()
            while rs:
                for id, file_name in rs:
                    repo_files[id] = file_name
                rs = cursor.fetchmany()
            self.__dict__['files'] = (repo_id, repo_files)
            adj.files = repo_files

        # Get the files that have been renamed
        # with the new name for the given rev
        query = "select af.file_id, af.new_file_name " + \
                "from actions_file_names af, files f " + \
                "where af.file_id = f.id " + \
                "and af.commit_id = ? " + \
                "and af.type = 'V' " + \
                "and f.repository_id = ?"
        # profiler_start("Getting new file names for commit %d", (commit_id,))
        cursor.execute(statement(query, db.place_holder), (commit_id, repo_id))
        # profiler_stop("Getting new file names for commit %d", (commit_id,), 
        # True)
        rs = cursor.fetchmany()
        while rs:
            for id, file_name in rs:
                adj.files[id] = file_name
            rs = cursor.fetchmany()

        # Get the new file links since the last time
        query = "select fl.parent_id, fl.file_id " + \
                "from file_links fl, files f " + \
                "where fl.file_id = f.id "
        if prev_commit_id is None:
            query += "and fl.commit_id = ? "
            args = (commit_id, repo_id)
        else:
            query += "and fl.commit_id between ? and ? "
            args = (prev_commit_id, commit_id, repo_id)
        query += "and f.repository_id = ?"
#        profiler_start("Getting file links for commit %d", (commit_id,))
        cursor.execute(statement(query, db.place_holder), args)
#        profiler_stop("Getting file links for commit %d", (commit_id,), True)
        rs = cursor.fetchmany()
        while rs:
            for f1, f2 in rs:
                adj.adj[f2] = f1
            rs = cursor.fetchmany()

        profiler_stop("Updating adjacency matrix for commit %d",
                       (commit_id,), True)
Ejemplo n.º 40
0
                repo_uri = uri

            read_cursor.execute(statement( \
                    "SELECT id from repositories where uri = ?", \
                    db.place_holder), (repo_uri,))
            repo_id = read_cursor.fetchone()[0]
        except NotImplementedError:
            raise ExtensionRunError( \
                    "Content extension is not supported for %s repos" % \
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))
        
        profiler_start("Hunks: fetch all patches")
        icursor = ICursor(read_cursor, self.INTERVAL_SIZE)
        # Get the patches from this repository
        query = """select p.commit_id, p.patch, s.rev 
                    from patches p, scmlog s 
                    where p.commit_id = s.id and
                    s.repository_id = ? and 
                    p.patch is not NULL"""
        icursor.execute(statement(query, db.place_holder), (repo_id,))
        profiler_stop("Hunks: fetch all patches", delete=True)

        self.__prepare_table(connection)
        fp = FilePaths(db)
        rs = icursor.fetchmany()

        while rs:
Ejemplo n.º 41
0
            i = i + 1
            if i >= queuesize:
                printdbg("Content queue is now at %d, flushing to database", 
                         (i,))
                
                processed_jobs = self.__process_finished_jobs(job_pool, 
                                                              write_cursor, db)
                connection.commit()
                i = i - processed_jobs
                if processed_jobs < (queuesize / 5):
                    job_pool.join()

        job_pool.join()
        self.__process_finished_jobs(job_pool, write_cursor, db)
                
        profiler_start("Inserting results in db")
        #self.__insert_many(write_cursor)
        connection.commit()
        profiler_stop("Inserting results in db")

        read_cursor.close()
        write_cursor.close()
        connection.close()

        # This turns off the profiler and deletes it's timings
        profiler_stop("Running content extension", delete=True)
        
    def backout(self, repo, uri, db):
        update_statement = """delete from content where
                              commit_id in (select id from scmlog s
                                            where s.repository_id = ?)"""