Ejemplo n.º 1
0
    def getChangedTexts(self, commitobj):
        if self.changedTexts != None:
            return self.changedTexts
        elif self.changedTexts_data != None:
            return self._loadChangedTextFromBackingVar()
        elif commitobj == None:
            raise Exception("NULL passed to getChangedTexts when local changedTexts was not set")
            
        alldiffs = []
        differ = gdiff.diff_match_patch()
        
        for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('M'): #Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            diffs = differ.diff_main(left, right)
            if diffs: differ.diff_cleanupSemantic(diffs)

            for d in diffs:
                if d[0] != 0 and d[1].strip():
                    alldiffs.append(d[1].lower())

        for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('A'): #Added
            addition = d.b_blob.data_stream.read()
            alldiffs.append(addition.lower())
        #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('D'): #Deleted
        #    pass
        #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('R'): #Renamed
        #    pass
        self.changedTexts = alldiffs
        return self.changedTexts
Ejemplo n.º 2
0
def getCommits(repo, startdate, enddate):
    localfolder = urlToFolder(repo.url)
    differ = gdiff.diff_match_patch()

    repoloc = Config.fsdir + 'git-repos/' + localfolder + '/'
    if os.path.exists(repoloc):
        c = pygit.Repo(repoloc)
    else:
        os.makedirs(repoloc)
        c = pygit.Repo.init(repoloc)
        c.create_remote('origin', repo.url)

    c.remotes.origin.fetch()
    c.remotes.origin.pull('master')

    commits = []
    msgs = c.iter_commits(since=unixToGitDateFormat(startdate))
    for m in msgs:
        if m.committed_date > enddate: continue
        
        # TODO BUG: We are unable to get a git diff of the first commit.
        #   http://osdir.com/ml/version-control.git/2005-05/msg01309.html
        #   http://git.661346.n2.nabble.com/git-diff-tree-against-the-root-commit-td5685272.html
        if not m.parents: continue

        c = GitCommit()
        alldiffs = c.getChangedTexts(m)
        c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs)
        commits.append(c)
    return commits
Ejemplo n.º 3
0
    def getDiffsArray(self):
        if self.diffIsReallyBig: return []

        alldiffs = []
        differ = gdiff.diff_match_patch()
        client = pysvn.Client()

        for f in self.files:
            loc = self.repo.url + f
            loc = loc.replace("trunk//trunk", "trunk/")

            #Try/Catches are easier than seeing if the diff is an addition/deletion
            try:
                left = client.cat(url_or_path=loc,
                                  revision=pysvn.Revision(
                                      pysvn.opt_revision_kind.number,
                                      int(self.uniqueid) - 1))
            except:
                left = ''
            try:
                right = client.cat(url_or_path=loc,
                                   revision=pysvn.Revision(
                                       pysvn.opt_revision_kind.number,
                                       int(self.uniqueid)))
            except:
                right = ''

            alldiffs.append(differ.diff_main(left, right))

        return alldiffs
Ejemplo n.º 4
0
    def getChangedTexts(self, commitobj):
        if self.changedTexts != None:
            return self.changedTexts
        elif self.changedTexts_data != None:
            return self._loadChangedTextFromBackingVar()
        elif commitobj == None:
            raise Exception(
                "NULL passed to getChangedTexts when local changedTexts was not set"
            )

        alldiffs = []
        differ = gdiff.diff_match_patch()

        for d in commitobj.diff(commitobj.__str__() +
                                '^').iter_change_type('M'):  #Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            diffs = differ.diff_main(left, right)
            if diffs: differ.diff_cleanupSemantic(diffs)

            for d in diffs:
                if d[0] != 0 and d[1].strip():
                    alldiffs.append(d[1].lower())

        for d in commitobj.diff(commitobj.__str__() +
                                '^').iter_change_type('A'):  #Added
            addition = d.b_blob.data_stream.read()
            alldiffs.append(addition.lower())
        #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('D'): #Deleted
        #    pass
        #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('R'): #Renamed
        #    pass
        self.changedTexts = alldiffs
        return self.changedTexts
Ejemplo n.º 5
0
def getCommits(repo, startdate, enddate):
    localfolder = urlToFolder(repo.url)
    differ = gdiff.diff_match_patch()

    repoloc = Config.fsdir + 'git-repos/' + localfolder + '/'
    if os.path.exists(repoloc):
        c = pygit.Repo(repoloc)
    else:
        os.makedirs(repoloc)
        c = pygit.Repo.init(repoloc)
        c.create_remote('origin', repo.url)

    c.remotes.origin.fetch()
    c.remotes.origin.pull('master')

    commits = []
    msgs = c.iter_commits(since=unixToGitDateFormat(startdate))
    for m in msgs:
        if m.committed_date > enddate: continue

        # TODO BUG: We are unable to get a git diff of the first commit.
        #   http://osdir.com/ml/version-control.git/2005-05/msg01309.html
        #   http://git.661346.n2.nabble.com/git-diff-tree-against-the-root-commit-td5685272.html
        if not m.parents: continue

        c = GitCommit()
        alldiffs = c.getChangedTexts(m)
        c.loadFromSource(repo, m.message, m.committed_date,
                         m.stats.files.keys(), m.__str__(), alldiffs)
        commits.append(c)
    return commits
Ejemplo n.º 6
0
 def getPrettyDiffs(self):
     diffs = self.getDiffsArray()
     differ = gdiff.diff_match_patch()
     
     for d in diffs:
         differ.diff_cleanupSemantic(d)
         str = differ.diff_prettyHtml(d)
         if not isinstance(str, unicode):
            str = unicode(str, 'utf-8')
         else:
            str = str.encode('utf-8')
         yield str
Ejemplo n.º 7
0
    def run(self, revisionTextA, revisionTextB, revisionDict):
        gdiff = diff_match_patch()
        revisionDiffs = gdiff.diff_main(revisionTextA, revisionTextB, False)
        gdiff.diff_cleanupSemantic(revisionDiffs)
        revisionDiffs = filter(self.isRemoveOrAdd, revisionDiffs)
        diffWordCount = map(self.countWords, revisionDiffs)
        addedWordCount = self.getAddWordCount(diffWordCount)
        deletedWordCount = self.getDeletedWordCount(diffWordCount)

        revision = Revision(**revisionDict)
        revision.wordsAdded = addedWordCount
        revision.wordsDeleted = deletedWordCount
        revision.wordCount = self.getWordCount(revisionTextB)
        return revision.to_dict()
Ejemplo n.º 8
0
    def getPrettyDiffs(self, htmlize=True):
        if not htmlize:
            raise Exception("Do not know how to not htmlize prettyDiffs")

        diffs = self.getDiffsArray()
        differ = gdiff.diff_match_patch()
        
        for d in diffs:
            differ.diff_cleanupSemantic(d)
            str = differ.diff_prettyHtml(d)
            if not isinstance(str, unicode):
               str = unicode(str, 'utf-8')
            else:
               str = str.encode('utf-8')
            yield Markup(str)
Ejemplo n.º 9
0
    def getPrettyDiffs(self, htmlize=True):
        if not htmlize:
            raise Exception("Do not know how to not htmlize prettyDiffs")

        diffs = self.getDiffsArray()
        differ = gdiff.diff_match_patch()

        for d in diffs:
            differ.diff_cleanupSemantic(d)
            str = differ.diff_prettyHtml(d)
            if not isinstance(str, unicode):
                str = unicode(str, 'utf-8')
            else:
                str = str.encode('utf-8')
            yield Markup(str)
Ejemplo n.º 10
0
  def run(self, revisionTextA, revisionTextB, revisionDict):
    gdiff = diff_match_patch()
    revisionDiffs = gdiff.diff_main(revisionTextA,
        revisionTextB, False)
    gdiff.diff_cleanupSemantic(revisionDiffs)
    revisionDiffs = filter(self.isRemoveOrAdd, revisionDiffs)
    diffWordCount = map(self.countWords, revisionDiffs)
    addedWordCount = self.getAddWordCount(diffWordCount)
    deletedWordCount = self.getDeletedWordCount(diffWordCount)

    revision = Revision(**revisionDict)
    revision.wordsAdded = addedWordCount
    revision.wordsDeleted = deletedWordCount
    revision.wordCount = self.getWordCount(revisionTextB)
    return revision.to_dict()
Ejemplo n.º 11
0
    def getDiffsArray(self):
        if self.diffIsReallyBig: return []
        
        alldiffs = []
        differ = gdiff.diff_match_patch()
                
        commit = self.getChangedTextMetadata()
        for d in commit.diff(commit.__str__()+'^').iter_change_type('M'): #Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            alldiffs.append(differ.diff_main(left, right))

        for d in commit.diff(commit.__str__()+'^').iter_change_type('A'): #Added
            addition = d.b_blob.data_stream.read()
            alldiffs.append(differ.diff_main('', addition))

        return alldiffs
Ejemplo n.º 12
0
def getCommits(repo, startdate, enddate):
    localfolder = urlToFolder(repo.url)
    differ = gdiff.diff_match_patch()

    repoloc = "git-repos/" + localfolder + "/"
    if os.path.exists(repoloc):
        c = pygit.Repo(repoloc)
    else:
        os.makedirs(repoloc)
        c = pygit.Repo.init(repoloc)
        c.create_remote("origin", repo.url)

    c.remotes.origin.fetch()
    c.remotes.origin.pull("master")

    commits = []
    msgs = c.iter_commits(since=unixToGitDateFormat(startdate))
    for m in msgs:
        if m.committed_date > enddate:
            continue

        alldiffs = []
        for d in m.diff("HEAD~1").iter_change_type("M"):  # Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            diffs = differ.diff_main(left, right)
            if diffs:
                differ.diff_cleanupSemantic(diffs)

            for d in diffs:
                if d[0] != 0 and d[1].strip():
                    alldiffs.append(d)

        for d in m.diff().iter_change_type("A"):  # Added
            pass
        for d in m.diff().iter_change_type("D"):  # Deleted
            pass
        for d in m.diff().iter_change_type("R"):  # Renamed
            pass

        c = Commit()
        c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs)
        commits.append(c)
    return commits
Ejemplo n.º 13
0
def getCommits(repo, startdate, enddate):
    localfolder = urlToFolder(repo.url)
    differ = gdiff.diff_match_patch()

    repoloc = 'git-repos/' + localfolder + '/'
    if os.path.exists(repoloc):
        c = pygit.Repo(repoloc)
    else:
        os.makedirs(repoloc)
        c = pygit.Repo.init(repoloc)
        c.create_remote('origin', repo.url)

    c.remotes.origin.fetch()
    c.remotes.origin.pull('master')

    commits = []
    msgs = c.iter_commits(since=unixToGitDateFormat(startdate))
    for m in msgs:
        if m.committed_date > enddate: continue

        alldiffs = []
        for d in m.diff('HEAD~1').iter_change_type('M'):  #Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            diffs = differ.diff_main(left, right)
            if diffs: differ.diff_cleanupSemantic(diffs)

            for d in diffs:
                if d[0] != 0 and d[1].strip():
                    alldiffs.append(d)

        for d in m.diff().iter_change_type('A'):  #Added
            pass
        for d in m.diff().iter_change_type('D'):  #Deleted
            pass
        for d in m.diff().iter_change_type('R'):  #Renamed
            pass

        c = Commit()
        c.loadFromSource(repo, m.message, m.committed_date,
                         m.stats.files.keys(), m.__str__(), alldiffs)
        commits.append(c)
    return commits
Ejemplo n.º 14
0
    def getDiffsArray(self):
        if self.diffIsReallyBig: return []

        alldiffs = []
        differ = gdiff.diff_match_patch()

        commit = self.getChangedTextMetadata()
        for d in commit.diff(commit.__str__() +
                             '^').iter_change_type('M'):  #Changed
            left = d.a_blob.data_stream.read()
            right = d.b_blob.data_stream.read()
            alldiffs.append(differ.diff_main(left, right))

        for d in commit.diff(commit.__str__() +
                             '^').iter_change_type('A'):  #Added
            addition = d.b_blob.data_stream.read()
            alldiffs.append(differ.diff_main('', addition))

        return alldiffs
Ejemplo n.º 15
0
 def getDiffsArray(self):
     if self.diffIsReallyBig: return []
 
     alldiffs = []
     differ = gdiff.diff_match_patch()
     client = pysvn.Client()
     
     for f in self.files:
         loc = self.repo.url + f
         loc = loc.replace("trunk//trunk", "trunk/")
         
         #Try/Catches are easier than seeing if the diff is an addition/deletion
         try:
             left = client.cat(url_or_path=loc, revision=pysvn.Revision(pysvn.opt_revision_kind.number, int(self.uniqueid)-1))
         except:
             left = ''
         try:
             right = client.cat(url_or_path=loc, revision=pysvn.Revision(pysvn.opt_revision_kind.number, int(self.uniqueid)))
         except:
             right = ''
             
         alldiffs.append(differ.diff_main(left, right))
     
     return alldiffs