예제 #1
0
파일: Content.py 프로젝트: apepper/cvsanaly
    def __process_finished_jobs(self, job_pool, write_cursor, db):
        #        start = datetime.now()
        finished_job = job_pool.get_next_done(0)
        processed_jobs = 0
        # commit_id is the commit ID. For some reason, the
        # documentation advocates tablename_id as the reference,
        # but in the source, these are referred to as commit IDs.
        # Don't ask me why!
        while finished_job is not None:
            file_contents = None

            if not Config().no_content:
                file_contents = str(finished_job.file_contents)

            query = """
                insert into content(commit_id, file_id, content, loc, size) 
                    values(?,?,?,?,?)"""
            insert_statement = statement(query, db.place_holder)
            parameters = (finished_job.commit_id, finished_job.file_id,
                          file_contents, finished_job.file_number_of_lines,
                          finished_job.file_size)

            execute_statement(insert_statement,
                              parameters,
                              write_cursor,
                              db,
                              "Couldn't insert, duplicate record?",
                              exception=ExtensionRunError)

            processed_jobs += 1
            finished_job = job_pool.get_next_done(0)

        return processed_jobs
예제 #2
0
    def update_all(self, repo_id):
        """
        update_all enable cache for adjacency matrices
        Pros: File paths in different revisions can be
        accessed randomly, i.e. after calling update_all,
        get_path can be called with any revision in any
        order.
        Cons: It consumes significant memory to store
        the adjacency matrices

        If the config has low_memory set to true, shelve will
        be used instead, to write the cache out to disk.
        """
        profiler_start("Update all file paths")
        
        if Config().low_memory:
            self.shelve_file_name = str(time()) + "-shelve.db"
            
            # If there is an old file, shelf will complain viciously
            if os.path.exists(self.shelve_file_name):
                os.remove(self.shelve_file_name)
            
            self.__dict__['cached_adj'] = shelve.open(self.shelve_file_name, 
                                                        writeback=False)
        
        db = self.__dict__['db']
        cnn = db.connect()

        cursor = cnn.cursor()
        query = """select distinct(s.id) from scmlog s, actions a
                    where s.id = a.commit_id and repository_id=?
                    order by s.date"""
        cursor.execute(statement(query, db.place_holder), (repo_id,))
        
        old_id = -1
        all_commits = [i[0] for i in cursor.fetchall()]
        for id in all_commits:
            if old_id != id:
                adj = self.__dict__['cached_adj'].get(str(id))

                if adj is None:
                    self.update_for_revision(cursor, id, repo_id)
                    self.__dict__['cached_adj'][str(id)] = \
                    deepcopy(self.__dict__['adj'])
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)
예제 #3
0
 def close(self):
     """Closes FilePaths to ensure all caches are deleted"""
     
     if Config().low_memory:
         # FIXME: This should be closed, but sometimes shelve
         # just won't do it. The best way is to timeout the try,
         # but not closing and just deleting will do the same
         # think, just in a more yucky way
         printdbg("Syncing shelf")
         self.__dict__['cached_adj'].sync()
         printdbg("Closing shelf")
         self.__dict__['cached_adj'].close()
         printdbg("Deleting shelve " + self.shelve_file_name)
         os.remove(self.shelve_file_name)
         # Clean up cached adj in case this gets called without
         # update_all later
         self.__dict__['cached_adj'] = {}
예제 #4
0
    def __init__(self, repo, uri):
        LineCounter.__init__(self, repo, uri)

        from pycvsanaly2.Config import Config
        from pycvsanaly2.CVSParser import CVSParser

        p = CVSParser()
        p.set_repository(repo, uri)

        def new_line(line, parser):
            parser.feed(line)

        reader = LogReader()
        reader.set_repo(repo, uri)
        logfile = Config().repo_logfile
        if logfile is not None:
            reader.set_logfile(logfile)

        reader.start(new_line, p)

        self.lines = p.get_added_removed_lines()
예제 #5
0
                old_id = id
        cursor.close()
        cnn.close()
        profiler_stop("Update all file paths", delete=True)

if __name__ == '__main__':
    import sys
    from pycvsanaly2.Database import create_database
    from pycvsanaly2.Config import Config

    db = create_database('sqlite', sys.argv[1])
    cnn = db.connect()

    fp = FilePaths(db)

    config = Config()
    config.profile = True

    cursor = cnn.cursor()
    cursor.execute(
        "select s.id, file_id from scmlog s, actions a where s.id = a.commit_id"
    )
    old_id = -1
    for id, file_id in cursor.fetchall():
        if old_id != id:
            print "Commit ", id
            fp.update_for_revision(cursor, id, 1)
            old_id = id
        print fp.get_path(file_id, id, 1)

    cursor.close()
예제 #6
0
    def __get_path_from_db(self, file_id, commit_id):
        cursor = self.cnn.cursor()

        cursor.execute(statement(self.__path_query__, self.db.place_holder),
                       (file_id, commit_id))
        path = cursor.fetchone()[0]

        cursor.close()

        return "/" + path


if __name__ == '__main__':
    import sys
    from pycvsanaly2.Database import create_database
    from pycvsanaly2.Config import Config

    config = Config()
    config.load()
    db = create_database(config.db_driver, sys.argv[1], config.db_user,
                         config.db_password, config.db_hostname)
    cnn = db.connect()
    cursor = cnn.cursor()

    fr = FileRevs(db, cnn, cursor, 1)
    for revision, commit_id, file_id, action_type, composed in fr:
        print revision, commit_id, action_type, fr.get_path()

    cursor.close()
    cnn.close()
예제 #7
0
                                (self.path, self.rev, e.cmd, \
                                e.returncode, e.error))
            except Exception, e:
                failed = True
                printerr("Error obtaining %s@%s. Exception: %s", \
                        (self.path, self.rev, str(e)))

        self.repo.remove_watch(LS, wid)

        if failed:
            printerr("Failure due to error")
        else:
            try:
                self.ls_lines = io.getvalue().splitlines()

                if Config().count_types:
                    self.ls_lines = [
                        fp for fp in self.ls_lines
                        if guess_file_type(fp) in Config().count_types
                    ]
            except Exception, e:
                printerr("Error getting ls-lines." + "Exception: %s",
                         (str(e), ))
            finally:
                io.close()

    def _get_ls_line_count(self):
        return len(self.ls_lines)

    ls_line_count = property(_get_ls_line_count)
예제 #8
0
                        relative_path = file_link[1]
                        break
                    else:
                        file_link = cursor.fetchone()
            except CommandError as e:
                printerr(str(e) + '\n' + e.error)

        cursor.close()
        if relative_path is None:
            return None
        else:
            return relative_path.strip("/")

if __name__ == '__main__':
    import sys
    from pycvsanaly2.Database import create_database
    from pycvsanaly2.Config import Config

    config = Config()
    config.load()
    db = create_database(config.db_driver, sys.argv[1], config.db_user, config.db_password, config.db_hostname)
    cnn = db.connect()
    cursor = cnn.cursor()

    fr = FileRevs(db, cnn, cursor, 1)
    for revision, commit_id, file_id, action_type, composed in fr:
        print revision, commit_id, action_type, fr.get_path()

    cursor.close()
    cnn.close()
예제 #9
0
if __name__ == '__main__':
    import sys
    sys.path.insert(0, "../../")

from pycvsanaly2.Database import statement
from pycvsanaly2.utils import printdbg
from pycvsanaly2.profile import profiler_start, profiler_stop
from pycvsanaly2.Config import Config
from copy import deepcopy
import shelve
import os
from time import time


config = Config()


class Adj(object):
    def __init__(self):
        self.files = {}
        self.adj = {}


class FilePaths(object):
    __shared_state = {'rev': None,
                      'adj': None,
                      'files': None,
                      'cached_adj': {},
                      'db': None}
예제 #10
0
    def fixes_bug(self, commit_message):
        """Check whether a commit message indicated a bug was present.

        # This is set in the config. Uncomment if you wish to try out
        # specific regexes
        #>>> Config().bug_fix_regexes = ["defect(s)?", "patch(ing|es|ed)?", \
                "bug(s|fix(es)?)?", "debug(ged)?", "fix(es|ed)?", "\#\d+"]
        #>>> Config().bug_fix_regexes_case_sensitive = ["[A-Z]+-\d+",]
        >>> b = BugFixMessage()

        # Easy ones
        >>> b.fixes_bug("Bug")
        True
        >>> b.fixes_bug("Bugs")
        True
        >>> b.fixes_bug("Fix")
        True
        >>> b.fixes_bug("Fixed")
        True
        >>> b.fixes_bug("Defect")
        True
        >>> b.fixes_bug("Defects")
        True
        >>> b.fixes_bug("Patches")
        True
        >>> b.fixes_bug("Patching")
        True

        # Embeds in sentences
        >>> b.fixes_bug("Fixed a bug")
        True
        >>> b.fixes_bug("Debugged this one")
        True
        >>> b.fixes_bug("Found a hole, which I patched, shouldn't be problem")
        True
        >>> b.fixes_bug("Put in a couple of fixes in x.java")
        True
        >>> b.fixes_bug("Implemented a bugfix")
        True
        >>> b.fixes_bug("References #1234")
        True
        >>> b.fixes_bug("Defect X is no more")
        True
        >>> b.fixes_bug("Closes JENKINS-1234")
        True

        # Embeds in long commit messages
        >>> b.fixes_bug("This was tough. Fixed now.")
        True
        >>> b.fixes_bug("Found X; debugged and solved.")
        True

        # Regression tests from Apache
        # When adding these, keep weird punctuation intact.
        >>> b.fixes_bug("Fixups to build the whole shebang once again.")
        True
        >>> b.fixes_bug("Change some INFO messages to DEBUG messages.")
        True
        >>> b.fixes_bug("Put back PR#6347")
        True
        >>> b.fixes_bug("Typo fixage..")
        True
        >>> b.fixes_bug("another typo/fixup")
        True
        >>> b.fixes_bug("Refix the entity tag comparisons")
        True
        >>> b.fixes_bug("Closeout PR#721")
        True
        >>> b.fixes_bug("SECURITY: CVE-2010-0408 (cve.mitre.org)")
        True
        >>> b.fixes_bug("    debugged the require_one and require_all")
        True
        >>> b.fixes_bug("    various style fixups / general changes")
        True
        >>> b.fixes_bug("    Win32: Eliminate useless debug error message")
        True

        # Things that shouldn't match
        # Refactoring could go either way, depending on whether you think
        # renaming/refactoring is a "bug fix." Right now, we don't call that
        # a "bug"
        >>> b.fixes_bug("Added method print_debug()")
        False
        >>> b.fixes_bug("Altered debug_log()")
        False
        >>> b.fixes_bug("NETWORK_PATCH_FIX")
        False
        >>> b.fixes_bug("Rename ap_debug_assert() to AP_DEBUG_ASSERT()")
        False
        >>> b.fixes_bug("Use bread() etc instead of fread() for " + \
                        "reading/writing")
        False
        >>> b.fixes_bug("Refactored to look cleaner")
        False
        >>> b.fixes_bug("Rewrite this yucky file")
        False
        >>> b.fixes_bug("Edited this file on 2010-12-01")
        False
        >>> b.fixes_bug("This file pertains to the A80-154 spec")
        False
        >>> b.fixes_bug("This is for March-28")
        False
        """
        if self.__match_string(Config().bug_fix_regexes, \
        re.DOTALL | re.IGNORECASE, commit_message):
            return True

        if self.__match_string(Config().bug_fix_regexes_case_sensitive, \
        re.DOTALL, commit_message):
            return True

        return False
예제 #11
0
파일: Content.py 프로젝트: apepper/cvsanaly
                    (repo.get_type()))
        except Exception, e:
            raise ExtensionRunError( \
                    "Error creating repository %s. Exception: %s" % \
                    (repo.get_uri(), str(e)))

        # Try to create a table for storing the content
        # TODO: Removed use case for choosing between all or just the HEAD,
        # should ideally put that back again. Just all for now is fine.
        try:
            self.__prepare_table(connection)
        except Exception as e:
            raise ExtensionRunError("Couldn't prepare table because " + \
                                    str(e))

        queuesize = Config().max_threads
        printdbg("Setting queuesize to " + str(queuesize))

        # This is where the threading stuff comes in, I expect
        job_pool = JobPool(repo, path or repo.get_uri(), queuesize=queuesize)

        # This filters files if they're not source files.
        # I'm pretty sure "unknown" is returning binary files too, but
        # these are implicitly left out when trying to convert to utf-8
        # after download. However, ignore them for now to speed things up
        query = "select f.id from file_types ft, files f " + \
                "where f.id = ft.file_id and " + \
                "ft.type in('code') and " + \
                "f.repository_id = ?"
        # "ft.type in('code', 'unknown') and " + \
        read_cursor.execute(statement(query, db.place_holder), (repo_id, ))