Example #1
0
    def get_patch_for_commit(self):
        def diff_line(data, io):
            io.write(data)

        io = BytesIO()
        wid = self.repo.add_watch(DIFF, diff_line, io)

        done = False
        failed = False
        retries = 3

        while not done and not failed:
            try:
                self.repo.show(self.repo_uri, self.rev)
                self.data = to_utf8(io.getvalue().strip()).decode("utf-8")
                done = True
            except (CommandError, CommandRunningError) as e:
                if retries > 0:
                    printerr("Error running show command: %s, trying again",
                             (str(e),))
                    retries -= 1
                    io.seek(0)
                elif retries <= 0:
                    failed = True
                    printerr("Error running show command: %s, FAILED",
                             (str(e),))
                    self.data = None

        self.repo.remove_watch(DIFF, wid)

        return self.data
Example #2
0
def get_line_types(repo, repo_uri, rev, path):
    """Returns an array, where each item means a line of code.
       Each item is labled 'code', 'comment' or 'empty'"""

    #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path))
    uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path
    file_content = _get_file_content(repo, uri, rev)  # get file_content

    if file_content is None or file_content == '':
        printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.")
        line_types = None
    else:
        try:
            lexer = get_lexer_for_filename(path)
        except ClassNotFound:
            try:
                printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".")
                lexer = guess_lexer(file_content)
            except ClassNotFound:
                printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.")
                lexer = TextLexer()

        if isinstance(lexer, NemerleLexer):
            # this lexer is broken and yield an unstoppable process
            # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop
            lexer = TextLexer()

        # Not shure if this should be skipped, when the language uses off-side rules (e.g. python,
        # see http://en.wikipedia.org/wiki/Off-side_rule for list)
        stripped_code = _strip_lines(file_content)
        lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code))
        line_types_str = _comment_empty_or_code(lexer_output)
        line_types = line_types_str.split("\n")

    return line_types
Example #3
0
    def __init__(self, repo, uri):
        LineCounter.__init__(self, repo, uri)

        self.commit_pattern = re.compile("^(\w+) ")
        self.file_pattern = re.compile("^(\d+)\s+(\d+)\s+([^\s].*)$")

        # Dictionary for storing added, removed pairs, keyed by commit.
        self.lines = {}
        # Dictionary for storing list of paths, keyed by commit.
        self.paths = {}
        # Dictionary for storing added, removed pairs, keyed by commit.
        # and path
        self.lines_files = {}

        # Run git command
        self.git = find_program('git')
        if self.git is None:
            raise ExtensionRunError("Error running CommitsLOCDet extension: " +
                                    "required git command cannot be found in path")
        cmd = [self.git, 'log',
               '--all', '--topo-order', '--numstat', '--pretty=oneline']
        c = Command(cmd, uri)
        try:
            c.run(parser_out_func=self.__parse_line)
        except CommandError, e:
            if e.error:
                printerr("Error running git log command: %s", (e.error,))
            raise ExtensionRunError("Error running " +
                                    "CommitsLOCDet extension: %s", str(e))
Example #4
0
    def run (self, repo, repo_uri):
        profiler_start("Running BlameJob for %s@%s", (self.path,self.rev))
        def blame_line (line, p):
            p.feed (line)

        repo_type = repo.get_type ()
        if repo_type == 'cvs':
            # CVS paths contain the module stuff
            uri = repo.get_uri_for_path (repo_uri)
            module = uri[len (repo.get_uri ()):].strip ('/')

            if module != '.':
                path = self.path[len (module):].strip ('/')
            else:
                path = self.path.strip ('/')
        else:
            path = self.path.strip ('/')

        filename = os.path.basename (self.path)
        p = create_parser (repo.get_type (), self.path)
        out = self.get_content_handler()
        p.set_output_device (out)
        wid = repo.add_watch (BLAME, blame_line, p)
        try:
            repo.blame (os.path.join (repo_uri, path), self.rev)
            self.collect_results(out)
        except RepositoryCommandError, e:
            self.failed = True
            printerr ("Command %s returned %d (%s)", (e.cmd, e.returncode, e.error))
Example #5
0
    def listen_for_data(self, repo_func, watcher):
        def write_line(data, io):
            io.write(data)

        io = BytesIO()

        wid = self.repo.add_watch(watcher, write_line, io)

        # Git doesn't need retries because all of the revisions
        # are already on disk
        if self.repo_type == "git":
            retries = 0
        else:
            retries = 3

        done = False
        failed = False
        # Try downloading the file revision
        while not done and not failed:
            try:
                repo_func(os.path.join(self.repo_uri, self.path), self.rev)
                done = True
            except RepositoryCommandError, e:
                if retries > 0:
                    printerr("Command %s returned %d(%s), try again", (e.cmd, e.returncode, e.error))
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr(
                        "Error obtaining %s@%s. " + "Command %s returned %d(%s)",
                        (self.path, self.rev, e.cmd, e.returncode, e.error),
                    )
            except:
Example #6
0
   def _do_backout(self, repo, uri, db, backout_statement):
       connection = db.connect()
       repo_cursor = connection.cursor()
       repo_uri = get_repo_uri(uri, repo)
       
       try:
           repo_id = get_repo_id(repo_uri, repo_cursor, db)
       except RepoNotFound:
           # Repository isn't in there, so it's likely already backed out
           printerr("Repository not found, is it in the database?")
           return True
       finally:
           repo_cursor.close()
         
       update_cursor = connection.cursor()
 
       execute_statement(statement(backout_statement, db.place_holder),
                           (repo_id,),
                           update_cursor,
                           db,
                           "Couldn't backout extension",
                           exception=ExtensionBackoutError)
       update_cursor.close()
       connection.commit()
       connection.close()
Example #7
0
    def __prepare_table(self, connection, drop_table=False):
        cursor = connection.cursor()

        # Drop the table's old data
        if drop_table:
            try:
                cursor.execute("DROP TABLE hunks")
            except Exception, e:
                printerr("Couldn't drop hunks table because %s", (e,))
Example #8
0
    def __prepare_table(self, connection, drop_table=False):
        # Drop the table's old data
        if drop_table:
            cursor = connection.cursor()

            try:
                cursor.execute("DROP TABLE content")
            except Exception, e:
                printerr("Couldn't drop content table because %s", (e,))
            finally:
Example #9
0
    def __prepare_table(self, connection, drop_table=False):
        # Drop the table's old data
        if drop_table:
            cursor = connection.cursor()

            try:
                cursor.execute("DROP TABLE content")
            except Exception, e:
                printerr("Couldn't drop content table because %s", (e, ))
            finally:
Example #10
0
def get_extension (extension_name):
    if extension_name not in _extensions:
        try:
            __import__ ("pycvsanaly2.extensions.%s" % extension_name)
        except ImportError as e:
            printerr("Error in importing extension %s: %s", (extension_name, str(e)))

    if extension_name not in _extensions:
        raise ExtensionUnknownError ('Extension %s not registered' % extension_name)

    return _extensions[extension_name]
Example #11
0
    def get_patch_for_commit(self, rev):
        def diff_line(data, io):
            io.write(data)

        io = StringIO()
        wid = self.repo.add_watch(DIFF, diff_line, io)
        try:
            self.repo.show(self.repo_uri, rev)
            data = io.getvalue()
        except Exception, e:
            printerr("Error running show command: %s", [str(e)])
            data = None
Example #12
0
    def get_patch_for_commit (self, rev):
        def diff_line (data, io):
            io.write (data)

        io = StringIO ()
        wid = self.repo.add_watch (DIFF, diff_line, io)
        try:
            self.repo.show (self.repo_uri, rev)
            data = io.getvalue ()
        except Exception, e:
            printerr ("Error running show command: %s", (str (e)))
            data = None
Example #13
0
def iter_file_patch(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for patches
    :kwarg allow_dirty: If True, allow comments and other non-patch text
        before the first patch.  Note that the algorithm here can only find
        such text before any patches have been found.  Comments after the
        first patch are stripped away in iter_hunks() if it is also passed
        allow_dirty=True.  Default False.
    '''
    ### FIXME: Docstring is not quite true.  We allow certain comments no
    # matter what, If they startwith '===', '***', or '#' Someone should
    # reexamine this logic and decide if we should include those in
    # allow_dirty or restrict those to only being before the patch is found
    # (as allow_dirty does).
    regex = re.compile(binary_files_re)
    saved_lines = []
    orig_range = 0
    beginning = True
    for line in iter_lines:
        if line.startswith('=== ') or line.startswith('*** '):
            continue
        if line.startswith('#'):
            continue
        elif orig_range > 0:
            if line.startswith('-') or line.startswith(' '):
                orig_range -= 1
        elif line.startswith('--- ') or regex.match(line):
            if allow_dirty and beginning:
                # Patches can have "junk" at the beginning
                # Stripping junk from the end of patches is handled when we
                # parse the patch
                beginning = False
            elif len(saved_lines) > 0:
                yield saved_lines
            saved_lines = []
        elif line.startswith('@@'):
            try:
                hunk = hunk_from_header(line)
            except MalformedHunkHeader, e:
                if allow_dirty:
                    printerr(
                        "\nError: MalformedHunkHeader; Probably merge commit. Skipping."
                    )
                    continue
                raise e
            orig_range = hunk.orig_range
        saved_lines.append(line)
Example #14
0
def iter_file_patch(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for patches
    :kwarg allow_dirty: If True, allow comments and other non-patch text
        before the first patch.  Note that the algorithm here can only find
        such text before any patches have been found.  Comments after the
        first patch are stripped away in iter_hunks() if it is also passed
        allow_dirty=True.  Default False.
    '''
    ### FIXME: Docstring is not quite true.  We allow certain comments no
    # matter what, If they startwith '===', '***', or '#' Someone should
    # reexamine this logic and decide if we should include those in
    # allow_dirty or restrict those to only being before the patch is found
    # (as allow_dirty does).
    regex = re.compile(binary_files_re)
    saved_lines = []
    orig_range = 0
    beginning = True
    for line in iter_lines:
        if line.startswith('=== ') or line.startswith('*** '):
            continue
        if line.startswith('#'):
            continue
        elif orig_range > 0:
            if line.startswith('-') or line.startswith(' '):
                orig_range -= 1
        elif line.startswith('--- ') or regex.match(line):
            if allow_dirty and beginning:
                # Patches can have "junk" at the beginning
                # Stripping junk from the end of patches is handled when we
                # parse the patch
                beginning = False
            elif len(saved_lines) > 0:
                yield saved_lines
            saved_lines = []
        elif line.startswith('@@'):
            try:
                hunk = hunk_from_header(line)
            except MalformedHunkHeader, e:
                if allow_dirty:
                    printerr("\nError: MalformedHunkHeader; Probably merge commit. Skipping.")
                    continue
                raise e
            orig_range = hunk.orig_range
        saved_lines.append(line)
Example #15
0
def _get_file_content(repo, uri, rev):
    """Reads the content of a file and revision from a given repository"""

    def write_line(data, io):
        io.write(data)

    io = BytesIO()
    wid = repo.add_watch(CAT, write_line, io)
    try:
        repo.cat(uri, rev)
        file_content = to_utf8(io.getvalue()).decode("utf-8")
        file_content = _convert_linebreaks(file_content) #make shure we do have the same new lines.
    except Exception as e:
        printerr("[get_line_types] Error running show command: %s, FAILED", (str(e),))
        file_content = None

    repo.remove_watch(CAT, wid)
    return file_content
Example #16
0
    def __init__(self, repo, uri):
        LineCounter.__init__(self, repo, uri)

        self.git = find_program('git')
        if self.git is None:
            raise ExtensionRunError("Error running CommitsLOC extension: " +
                                    "required git command cannot be found in path")

        self.lines = {}

        cmd = [self.git, 'log', '--all', '--topo-order', '--shortstat', '--pretty=oneline', 'origin']
        c = Command(cmd, uri)
        try:
            c.run(parser_out_func=self.__parse_line)
        except CommandError, e:
            if e.error:
                printerr("Error running git log command: %s", (e.error,))
            raise ExtensionRunError("Error running CommitsLOC extension: %s", str(e))
Example #17
0
def iter_hunks(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for hunks
    :kwarg allow_dirty: If True, when we encounter something that is not
        a hunk header when we're looking for one, assume the rest of the lines
        are not part of the patch (comments or other junk).  Default False
    '''
    hunk = None
    for line in iter_lines:
        if line == "\n":
            if hunk is not None:
                yield hunk
                hunk = None
            continue
        if hunk is not None:
            yield hunk
        try:
            hunk = hunk_from_header(line)
        except MalformedHunkHeader:
            if allow_dirty:
                # If the line isn't a hunk header, then we've reached the end
                # of this patch and there's "junk" at the end.  Ignore the
                # rest of this patch.
                return
            raise
        orig_size = 0
        mod_size = 0
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
            try:
                hunk_line = parse_line(iter_lines.next())
                hunk.lines.append(hunk_line)
                if isinstance(hunk_line, (RemoveLine, ContextLine)):
                    orig_size += 1
                if isinstance(hunk_line, (InsertLine, ContextLine)):
                    mod_size += 1
            except StopIteration:
                break
            except MalformedLine, e:
                if allow_dirty:
                    printerr(
                        "\nError: MalformedLine; Probably binary file. Skipping line."
                    )
                    continue
                raise e
Example #18
0
    def __process_finished_jobs(self, job_pool, connection, db):
        if isinstance(self.db, SqliteDatabase):
            from sqlite3 import IntegrityError
        elif isinstance(self.db, MysqlDatabase):
            from MySQLdb import IntegrityError
        write_cursor = connection.cursor()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (
                finished_job.commit_id,
                finished_job.file_id,
                finished_job.file_contents,
                finished_job.file_number_of_lines,
                finished_job.file_size,
            )
            try:
                write_cursor.execute(insert_statement, parameters)
            except IntegrityError as e:
                if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062:
                    # Ignore duplicate entry
                    pass
                else:
                    printerr(
                        "Error while inserting content for file %d @ commit %d"
                        % (finished_job.file_id, finished_job.commit_id)
                    )
                    raise

            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)

        connection.commit()
        write_cursor.close()

        return processed_jobs
Example #19
0
def _get_file_content(repo, uri, rev):
    """Reads the content of a file and revision from a given repository"""
    def write_line(data, io):
        io.write(data)

    io = BytesIO()
    wid = repo.add_watch(CAT, write_line, io)
    try:
        repo.cat(uri, rev)
        file_content = to_utf8(io.getvalue()).decode("utf-8")
        file_content = _convert_linebreaks(
            file_content)  #make shure we do have the same new lines.
    except Exception as e:
        printerr("[get_line_types] Error running show command: %s, FAILED",
                 (str(e), ))
        file_content = None

    repo.remove_watch(CAT, wid)
    return file_content
Example #20
0
def iter_hunks(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for hunks
    :kwarg allow_dirty: If True, when we encounter something that is not
        a hunk header when we're looking for one, assume the rest of the lines
        are not part of the patch (comments or other junk).  Default False
    '''
    hunk = None
    for line in iter_lines:
        if line == "\n":
            if hunk is not None:
                yield hunk
                hunk = None
            continue
        if hunk is not None:
            yield hunk
        try:
            hunk = hunk_from_header(line)
        except MalformedHunkHeader:
            if allow_dirty:
                # If the line isn't a hunk header, then we've reached the end
                # of this patch and there's "junk" at the end.  Ignore the
                # rest of this patch.
                return
            raise
        orig_size = 0
        mod_size = 0
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
            try:
                hunk_line = parse_line(iter_lines.next())
                hunk.lines.append(hunk_line)
                if isinstance(hunk_line, (RemoveLine, ContextLine)):
                    orig_size += 1
                if isinstance(hunk_line, (InsertLine, ContextLine)):
                    mod_size += 1
            except StopIteration:
                break
            except MalformedLine, e:
                if allow_dirty:
                    printerr("\nError: MalformedLine; Probably binary file. Skipping line.")
                    continue
                raise e
Example #21
0
    def run(self, repo, repo_uri):
        profiler_start("Running HunkBlameJob for %s@%s", (self.path, self.rev))

        def blame_line(line, p):
            p.feed(line)

        start = sys.maxint
        end = 0
        for hunk in self.hunks:
            if hunk[1] < start:
                start = hunk[1]
            if hunk[2] > end:
                end = hunk[2]

        repo_type = repo.get_type()
        if repo_type == 'cvs':
            # CVS paths contain the module stuff
            uri = repo.get_uri_for_path(repo_uri)
            module = uri[len(repo.get_uri()):].strip('/')

            if module != '.':
                path = self.path[len(module):].strip('/')
            else:
                path = self.path.strip('/')
        else:
            path = self.path.strip('/')

        p = create_parser(repo.get_type(), self.path)
        out = self.get_content_handler()
        p.set_output_device(out)
        wid = repo.add_watch(BLAME, blame_line, p)
        try:
            repo.blame(os.path.join(repo_uri, path),
                       self.rev,
                       start=start,
                       end=end)
            self.collect_results(out)
        except RepositoryCommandError, e:
            self.failed = True
            printerr("Command %s returned %d (%s)",
                     (e.cmd, e.returncode, e.error))
Example #22
0
    def __process_finished_jobs(self, job_pool, connection, db):
        if isinstance(self.db, SqliteDatabase):
            from sqlite3 import IntegrityError
        elif isinstance(self.db, MysqlDatabase):
            from MySQLdb import IntegrityError
        write_cursor = connection.cursor()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the 
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (finished_job.commit_id,
                          finished_job.file_id,
                          finished_job.file_contents,
                          finished_job.file_number_of_lines,
                          finished_job.file_size)
            try:                    
                write_cursor.execute(insert_statement, parameters)
            except IntegrityError as e:
                if isinstance(self.db, MysqlDatabase) and e.args[0] == 1062:
                    # Ignore duplicate entry
                    pass
                else:
                    printerr(
                        'Error while inserting content for file %d @ commit %d'
                        % (finished_job.file_id, finished_job.commit_id))
                    raise

            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)

        connection.commit()
        write_cursor.close()
            
        return processed_jobs
Example #23
0
def get_line_types(repo, repo_uri, rev, path):
    """Returns an array, where each item means a line of code.
       Each item is labled 'code', 'comment' or 'empty'"""

    #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path))
    uri = os.path.join(repo_uri,
                       path)  # concat repo_uri and file_path for full path
    file_content = _get_file_content(repo, uri, rev)  # get file_content

    if file_content is None or file_content == '':
        printerr("[get_line_types] Error: No file content for " + str(rev) +
                 ":" + str(path) + " found! Skipping.")
        line_types = None
    else:
        try:
            lexer = get_lexer_for_filename(path)
        except ClassNotFound:
            try:
                printdbg("[get_line_types] Guessing lexer for" + str(rev) +
                         ":" + str(path) + ".")
                lexer = guess_lexer(file_content)
            except ClassNotFound:
                printdbg("[get_line_types] No guess or lexer found for " +
                         str(rev) + ":" + str(path) +
                         ". Using TextLexer instead.")
                lexer = TextLexer()

        if isinstance(lexer, NemerleLexer):
            # this lexer is broken and yield an unstoppable process
            # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop
            lexer = TextLexer()

        # Not shure if this should be skipped, when the language uses off-side rules (e.g. python,
        # see http://en.wikipedia.org/wiki/Off-side_rule for list)
        stripped_code = _strip_lines(file_content)
        lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code))
        line_types_str = _comment_empty_or_code(lexer_output)
        line_types = line_types_str.split("\n")

    return line_types
Example #24
0
    def get_lines_for_revision(self, revision):

        revision = int(revision)

        def diff_line(data, diff_data_l):
            diff_data = diff_data_l[0]
            diff_data += data
            diff_data_l[0] = diff_data

        revs = []
        revs.append("%d" % (revision - 1))
        revs.append("%d" % (revision))
        env = os.environ.copy().update({'LC_ALL': 'C'})
        pipe = Popen(self.diffstat, shell=False, stdin=PIPE, stdout=PIPE, close_fds=True, env=env)
        diff_data = [""]
        wid = self.repo.add_watch(DIFF, diff_line, diff_data)
        try:
            self.repo.diff(self.repo.get_uri(), revs=revs, no_whitespace=True)
        except Exception, e:
            printerr("Error running svn diff command: %s", [str(e)])
            self.repo.remove_watch(DIFF, wid)
            return (0, 0)
Example #25
0
    def run(self, repo, repo_uri):
        def write_line(data, io):
            io.write(data)
        
        self.repo = repo
        self.repo_uri = repo_uri
        self.repo_type = self.repo.get_type()

        io = BytesIO()
        wid = repo.add_watch(LS, write_line, io)
        
        # Git doesn't need retries because all of the revisions
        # are already on disk
        if self.repo_type == 'git':
            retries = 0
        else:
            retries = 3
            
        done = False
        failed = False
        
        # Try downloading the file listing
        while not done and not failed:
            try:
                self.repo.ls(self.repo_uri, self.rev)
                done = True
            except RepositoryCommandError, e:
                if retries > 0:
                    printerr("Command %s returned %d(%s), try again",\
                            (e.cmd, e.returncode, e.error))
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))
Example #26
0
    def run(self, repo, repo_uri):
        def write_line(data, io):
            io.write(data)

        self.repo = repo
        self.repo_uri = repo_uri
        self.repo_type = self.repo.get_type()

        io = BytesIO()
        wid = repo.add_watch(LS, write_line, io)

        # Git doesn't need retries because all of the revisions
        # are already on disk
        if self.repo_type == 'git':
            retries = 0
        else:
            retries = 3

        done = False
        failed = False

        # Try downloading the file listing
        while not done and not failed:
            try:
                self.repo.ls(self.repo_uri, self.rev)
                done = True
            except RepositoryCommandError, e:
                if retries > 0:
                    printerr("Command %s returned %d(%s), try again",\
                            (e.cmd, e.returncode, e.error))
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))
Example #27
0
def main(argv):
    # Short (one letter) options. Those requiring argument followed by :
    short_opts = "hu:p:d:H:w:"
    # Long options (all started by --). Those requiring argument followed by =
    long_opts = [
        "help", "db-user="******"db-password="******"db-hostname=", "db-database=",
        "db-driver=", "window="
    ]

    # Default options
    user = "******"
    passwd = None
    hostname = "localhost"
    database = "cvsanaly"
    driver = "mysql"
    slidingTimeWindow = 120
    projectName = None
    repositoryUri = None

    fetchPath = os.getenv("FETCH")
    if fetchPath is None:
        printerr("FETCH environment variable not set")
        printerr("Use " + sys.argv[0] + " --help for details")
        return 1

    snavPath = os.getenv("SN_HOME")
    if snavPath is None:
        printerr("SN_HOME environment variable not set")
        printerr("Use " + sys.argv[0] + " --help for details")
        return 1

    pmcPath = os.getenv("PMC")
    if pmcPath is None:
        printerr("PMC environment variable not set")
        printerr("Use " + sys.argv[0] + " --help for details")
        return 1

    ldiffPath = os.getenv("LDIFF")
    if ldiffPath is None:
        printerr("LDIFF environment variable not set")
        printerr("Use " + sys.argv[0] + " --help for details")
        return 1

    try:
        opts, args = getopt.getopt(argv[1:], short_opts, long_opts)
    except getopt.GetoptError, e:
        printerr(str(e))
        return 1
Example #28
0
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))

        self.repo.remove_watch(LS, wid)

        if failed:
            printerr("Failure due to error")
        else:
            try:
                self.ls_lines = io.getvalue().splitlines()

                if Config().count_types:
                    self.ls_lines = [
                        fp for fp in self.ls_lines
                        if guess_file_type(fp) in Config().count_types
                    ]
            except Exception, e:
                printerr("Error getting ls-lines." + "Exception: %s",
                         (str(e), ))
            finally:
                io.close()
Example #29
0
    def run(self, repo, uri, db):
        """Fill in the commits_lines table.

        Create a counter to find number of lines added and removed
        for each commit in repo,
        create an object to manage the commits_lines table,
        for each commit in repo, create an entry in commit_lines table
        (except for those that already were in the table).
        """

        cnn = db.connect()
        # Cursor for reading from the database
        cursor = cnn.cursor()
        # Cursor for writing to the database
        write_cursor = cnn.cursor()
        repo_id = self._get_repo_id(repo, uri, cursor)
        # Counter to find lines added, removed for each commit
        counter = create_line_counter_for_repository(repo, uri)
        # Object to manage the commits_lines table
        theTableComLines = TableComLines(db, cnn, repo_id)
        # Object to manage the commits_files_lines table
        theTableComFilLines = TableComFilLines(db, cnn, repo_id)

        cursor.execute("SELECT id, rev, composed_rev " +
                       "FROM scmlog WHERE repository_id = '%s'",
                       (repo_id,))
        rows_left = True
        while rows_left:
            rows = cursor.fetchmany()
            for id, revision, composed_rev in rows:
                if composed_rev:
                    commit = revision.split("|")[0]
                else:
                    commit = revision
                cadded = cremoved = 0
                if not theTableComLines.in_table(id):
                    (cadded, cremoved) = counter.get_lines_for_commit(commit)
                    theTableComLines.add_pending_row((None, id,
                                                      cadded, cremoved))
                tadded = tremoved = 0
                for path in counter.get_paths_for_commit(commit):
                    if not theTableComFilLines.in_table(str(id) + ',' + path):
                        (added, removed) = \
                            counter.get_lines_for_commit_file(commit, path)
                        theTableComFilLines.add_pending_row((None, id, path,
                                                             added, removed))
                        tadded += int(added)
                        tremoved += int(removed)
                # Sanity check
                if (cadded != tadded) or (cremoved != tremoved):
                    printerr("Sanity check failed: %d, %s, %d, %d, %d, %d" %
                             (id, commit, cadded, tadded, cremoved, tremoved))
                    printerr(counter.get_paths_for_commit(commit))
            theTableComLines.insert_rows(write_cursor)
            theTableComFilLines.insert_rows(write_cursor)
            if not rows:
                rows_left = False
        cnn.commit()
        write_cursor.close()
        cursor.close()
        cnn.close()
Example #30
0
            for hunk in self.get_commit_data(patch_content):
                # Get the file ID from the database for linking
                hunk_file_name = re.sub(r'^[ab]\/', '',
                                        hunk.file_name.strip())
                file_id = fp.get_file_id(hunk_file_name, commit_id)

                if file_id == None:
                    printdbg("file not found")
                    if repo.type == "git":
                        # The liklihood is that this is a merge, not a
                        # missing ID from some data screwup.
                        # We'll just continue and throw this away
                        continue
                    else:
                        printerr("No file ID found for hunk " + \
                                 hunk_file_name + \
                                 " at commit " + str(commit_id))

                insert = """insert into hunks(file_id, commit_id,
                            old_start_line, old_end_line, new_start_line,
                            new_end_line)
                            values(?,?,?,?,?,?)"""

                execute_statement(statement(insert, db.place_holder),
                                  (file_id, commit_id,
                                   hunk.old_start_line,
                                   hunk.old_end_line,
                                   hunk.new_start_line,
                                   hunk.new_end_line),
                                   write_cursor,
                                   db,
Example #31
0
                for hunk in self.get_commit_data(patch_content):
                    # Get the file ID from the database for linking
                    hunk_file_name = re.sub(r'^[ab]\/', '',
                                            hunk.file_name.strip())
                    file_id = fp.get_file_id(hunk_file_name, commit_id)

                    if file_id == None:
                        printdbg("file not found")
                        if repo.type == "git":
                            # The liklihood is that this is a merge, not a
                            # missing ID from some data screwup.
                            # We'll just continue and throw this away
                            continue
                        else:
                            printerr("No file ID found for hunk " + \
                                     hunk_file_name + \
                                     " at commit " + commit_id)

                    insert = """insert into hunks(file_id, commit_id,
                                old_start_line, old_end_line, new_start_line, 
                                new_end_line)
                                values(?,?,?,?,?,?)"""

                    execute_statement(statement(insert, db.place_holder),
                                      (file_id, commit_id, hunk.old_start_line,
                                       hunk.old_end_line, hunk.new_start_line,
                                       hunk.new_end_line),
                                      write_cursor,
                                      db,
                                      "Couldn't insert hunk, dup record?",
                                      exception=ExtensionRunError)
Example #32
0
    # Configuration & initialisation of output
    cdifWriter.writeMooseCompliantCdif()
    outputHandle = open(outputFile, "w")
    outputHandle.close()
    outputHandle = open(outputFile, "a+")

    EvoCdifWriter.setOutputHandle(outputHandle)
    cdifWriter.set_outputhandle(outputHandle)
    cdifWriter.initializeIDCounter()
    cdifWriter.generateHeader(True, "evofetch", projectName, "4", "C++",
                              "ISO98")

    try:
        db = create_database(driver, database, user, passwd, hostname)
    except AccessDenied, e:
        printerr("Error creating database: %s", (e.message))
        return 1
    except DatabaseNotFound:
        printerr(
            "Database %s doesn't exist. It must be created before running cvsanaly",
            (database))
        return 1
    except DatabaseDriverNotSupported:
        printerr("Database driver %s is not supported by cvsanaly", (driver))
        return 1

    cnn = db.connect()

    try:
        repositoryID = findRepository(db, cnn, projectName)
    except ProjectNotFound:
Example #33
0
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))
        self.repo.remove_watch(watcher, wid)

        if failed:
            printerr("Failure due to error")
        else:
            try:
                results = io.getvalue()
                return results
            except Exception, e:
                printerr("Error getting contents." + "Exception: %s",
                         (str(e), ))
            finally:
                io.close()

    def _get_file_contents(self):
        """Returns contents of the file, stripped of whitespace 
            at either end
            """
        # An encode will fail if the source code can't be converted to
Example #34
0
                                    break
                                    break
                           
                            if possible_file[1] == hunk_file_name:
                                file_id = possible_file[0]
                                break
                                break
    
                    if file_id == None:
                        if repo.type == "git":
                            # The liklihood is that this is a merge, not a
                            # missing ID from some data screwup.
                            # We'll just continue and throw this away
                            continue
                        else:
                            printerr("No file ID found for hunk " + hunk_file_name)
                            
    
                    insert = """insert into hunks(file_id, commit_id,
                                old_start_line, old_end_line, new_start_line, new_end_line)
                                values(?,?,?,?,?,?)"""

                    execute_statement(statement(insert, db.place_holder),
                                      (file_id, commit_id, hunk.old_start_line, \
                                       hunk.old_end_line, hunk.new_start_line, \
                                       hunk.new_end_line),
                                       write_cursor,
                                       db,
                                       "Couldn't insert hunk, duplicate record?",
                                       exception=ExtensionRunError)
                
Example #35
0
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))

        self.repo.remove_watch(LS, wid)

        if failed:
            printerr("Failure due to error")
        else:
            try:
                self.ls_lines = io.getvalue().splitlines()

                if Config().count_types:
                    self.ls_lines = [fp for fp in self.ls_lines if
                                     guess_file_type(fp) in 
                                     Config().count_types]
            except Exception, e:
                printerr("Error getting ls-lines." +
                            "Exception: %s", (str(e),))
            finally:
                io.close()
            
    def _get_ls_line_count(self):
Example #36
0
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))
        self.repo.remove_watch(watcher, wid)

        if failed:
            printerr("Failure due to error")
        else:
            try:
                results = io.getvalue()
                return results
            except Exception, e:
                printerr("Error getting contents." +
                            "Exception: %s", (str(e),))
            finally:
                io.close()
                
    def _get_file_contents(self):
            """Returns contents of the file, stripped of whitespace 
            at either end
            """
            # An encode will fail if the source code can't be converted to
Example #37
0
         done = True
     except RepositoryCommandError, e:
         if retries > 0:
             printerr("Command %s returned %d(%s), try again",
                     (e.cmd, e.returncode, e.error))
             retries -= 1
             io.seek(0)
         elif retries == 0:
             failed = True
             printerr("Error obtaining %s@%s. " +
                      "Command %s returned %d(%s)",
                      (self.path, self.rev, e.cmd,
                      e.returncode, e.error))
     except:
         failed = True
         printerr("Error obtaining %s@%s.",
                 (self.path, self.rev))
         traceback.print_exc()
         
 self.repo.remove_watch(watcher, wid)
 
 results = None
 if not failed:
     try:
         results = io.getvalue()
     except Exception, e:
         printerr("Error getting contents." +
                  "Exception: %s", (str(e),))
     finally:
         io.close()
 return results
         
Example #38
0
                    
                hunks = [h for h in hunks if h[0] not in blames]
                job = HunkBlameJob(hunks, relative_path, pre_rev)
                
                job_pool.push (job)
                n_blames += 1
        
                if n_blames >= self.MAX_BLAMES:
                    processed_jobs = self.process_finished_jobs (job_pool, write_cursor)
                    n_blames -= processed_jobs
                    if processed_jobs<=self.MAX_BLAMES/5:
                        profiler_start("Joining unprocessed jobs")
                        job_pool.join()
                        profiler_stop("Joining unprocessed jobs", delete=True)
            except NotValidHunkWarning as e:
                printerr("Not a valid hunk: "+str(e))
            finally:
                file_rev = read_cursor.fetchone()

        job_pool.join ()
        self.process_finished_jobs (job_pool, write_cursor, True)

        try:
            self.__drop_cache(cnn)
        except:
            printdbg("Couldn't drop cache because of " + str(e))

        read_cursor.close ()
        write_cursor.close ()
        cnn.close()
Example #39
0
                hunks = [h for h in hunks if h[0] not in blames]
                job = HunkBlameJob(hunks, relative_path, pre_rev)

                job_pool.push(job)
                n_blames += 1

                if n_blames >= self.MAX_BLAMES:
                    processed_jobs = self.process_finished_jobs(
                        job_pool, write_cursor)
                    n_blames -= processed_jobs
                    if processed_jobs <= self.MAX_BLAMES / 5:
                        profiler_start("Joining unprocessed jobs")
                        job_pool.join()
                        profiler_stop("Joining unprocessed jobs", delete=True)
            except NotValidHunkWarning as e:
                printerr("Not a valid hunk: " + str(e))
            finally:
                file_rev = read_cursor.fetchone()

        job_pool.join()
        self.process_finished_jobs(job_pool, write_cursor, True)

        try:
            self.__drop_cache(cnn)
        except:
            printdbg("Couldn't drop cache because of " + str(e))

        read_cursor.close()
        write_cursor.close()
        cnn.close()
Example #40
0
    def run(self, repo, uri, db):
        """Fill in the commits_lines table.

        Create a counter to find number of lines added and removed
        for each commit in repo,
        create an object to manage the commits_lines table,
        for each commit in repo, create an entry in commit_lines table
        (except for those that already were in the table).
        """

        cnn = db.connect()
        # Cursor for reading from the database
        cursor = cnn.cursor()
        # Cursor for writing to the database
        write_cursor = cnn.cursor()
        repo_id = self._get_repo_id(repo, uri, cursor)
        # Counter to find lines added, removed for each commit
        counter = create_line_counter_for_repository(repo, uri)
        # Object to manage the commits_lines table
        theTableComLines = TableComLines(db, cnn, repo_id)
        # Object to manage the commits_files_lines table
        theTableComFilLines = TableComFilLines(db, cnn, repo_id)

        cursor.execute(
            "SELECT id, rev, composed_rev " +
            "FROM scmlog WHERE repository_id = '%s'", repo_id)
        rows_left = True
        while rows_left:
            rows = cursor.fetchmany()
            for id, revision, composed_rev in rows:
                if composed_rev:
                    commit = revision.split("|")[0]
                else:
                    commit = revision
                cadded = cremoved = 0
                if not theTableComLines.in_table(id):
                    (cadded, cremoved) = counter.get_lines_for_commit(commit)
                    theTableComLines.add_pending_row(
                        (None, id, cadded, cremoved))
                tadded = tremoved = 0
                for path in counter.get_paths_for_commit(commit):
                    if not theTableComFilLines.in_table(str(id) + ',' + path):
                        (added, removed) = \
                            counter.get_lines_for_commit_file(commit, path)
                        theTableComFilLines.add_pending_row(
                            (None, id, path, added, removed))
                        tadded += int(added)
                        tremoved += int(removed)
                # Sanity check
                if (cadded != tadded) or (cremoved != tremoved):
                    printerr("Sanity check failed: %d, %s, %d, %d, %d, %d" %
                             (id, commit, cadded, tadded, cremoved, tremoved))
                    printerr(counter.get_paths_for_commit(commit))
            theTableComLines.insert_rows(write_cursor)
            theTableComFilLines.insert_rows(write_cursor)
            if not rows:
                rows_left = False
        cnn.commit()
        write_cursor.close()
        cursor.close()
        cnn.close()
Example #41
0
    def run(self, repo, repo_uri):
        def write_line (data, io):
            io.write (data)
        
        # start = datetime.now()
        self.repo = repo
        self.repo_uri = repo_uri
        self.repo_type = self.repo.get_type()

        if self.repo_type == 'cvs':
            # CVS self.paths contain the module stuff
            uri = self.repo.get_uri_for_self.path(self.repo_uri)
            module = uri[len(self.repo.get_uri()):].strip('/')

            if module != '.':
                self.path = self.path[len(module):].strip('/')
            else:
                self.path = self.path.strip('/')
        else:
            self.path = self.path.strip('/')

        suffix = ''
        filename = os.path.basename(self.path)
        ext_ptr = filename.rfind('.')
        if ext_ptr != -1:
            suffix = filename[ext_ptr:]

        io = BytesIO()

        wid = self.repo.add_watch(CAT, write_line, io)
        
        # Git doesn't need retries because all of the revisions
        # are already on disk
        if self.repo_type == 'git':
            retries = 0
        else:
            retries = 3
            
        done = False
        failed = False
        # print "Before downloadning file revision: %s"%(datetime.now()-start)
        # Try downloading the file revision
        while not done and not failed:
            try:
                self.repo.cat(os.path.join(self.repo_uri, self.path), self.rev)
                # print "After cat: %s"%(datetime.now()-start)
                done = True
            except RepositoryCommandError, e:
                if retries > 0:
                    printerr("Command %s returned %d(%s), try again",\
                            (e.cmd, e.returncode, e.error))
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr("Error obtaining %s@%s. " +
                                "Command %s returned %d(%s)", \
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))
Example #42
0
                repo_func(os.path.join(self.repo_uri, self.path), self.rev)
                done = True
            except RepositoryCommandError, e:
                if retries > 0:
                    printerr("Command %s returned %d(%s), try again", (e.cmd, e.returncode, e.error))
                    retries -= 1
                    io.seek(0)
                elif retries == 0:
                    failed = True
                    printerr(
                        "Error obtaining %s@%s. " + "Command %s returned %d(%s)",
                        (self.path, self.rev, e.cmd, e.returncode, e.error),
                    )
            except:
                failed = True
                printerr("Error obtaining %s@%s.", (self.path, self.rev))
                traceback.print_exc()

        self.repo.remove_watch(watcher, wid)

        results = None
        if not failed:
            try:
                results = io.getvalue()
            except Exception, e:
                printerr("Error getting contents." + "Exception: %s", (str(e),))
            finally:
                io.close()
        return results

    def _get_file_contents(self):