def test_hungarian(self): matrices = [ # Square ([[400, 150, 400], [400, 450, 600], [300, 225, 300]], 850 # expected cost ), # Rectangular variant ([[400, 150, 400, 1], [400, 450, 600, 2], [300, 225, 300, 3]], 452 # expected cost ), # Square ([[10, 10, 8], [ 9, 8, 1], [ 9, 7, 4]], 18 ), # Rectangular variant ([[10, 10, 8, 11], [ 9, 8, 1, 1], [ 9, 7, 4, 10]], 15 ), ] for matrix, expected_cost in matrices: indexes = lapacho.hungarian(matrix) total_cost = 0 for i, j in indexes: total_cost += matrix[i][j] self.assertEqual(expected_cost, total_cost)
def build(repo, store, options): for c in repo.walk(repo.head.target, GIT_SORT_REVERSE): logger.info("commit %s", c.hex) # Store commit commit = store.find(Commit, Commit.hex == c.hex.decode("utf-8")).one() if commit is None: commit = store.add(Commit(c.hex.decode("utf-8"), c.commit_time)) store.flush() # Check commit message for i in test_regexp(c.message): bug = store.find(Bug, Bug.bug_no == i).one() if not bug: bug = store.add(Bug(i)) bug.commits.add(commit) logger.debug("bug %d", i) store.flush() # Check parents for p in c.parents: parent = store.find(Commit, Commit.hex == p.hex.decode("utf-8")).one() if parent is None: parent = store.add(Commit(p.hex.decode("utf-8"), p.commit_time)) commit.parents.add(parent) logger.debug("parent %s", p.hex) diff = p.tree.diff_to_tree(c.tree, GIT_DIFF_IGNORE_WHITESPACE_EOL) for p in [p for p in diff if splitext(p.old_file_path)[1] in [".c", ".h"]]: logger.debug("path %s", p.old_file_path) old_file = store.find(File, File.path == p.old_file_path.decode("utf-8")).one() if old_file is None: old_file = store.add(File(p.old_file_path.decode("utf-8"))) new_file = store.find(File, File.path == p.new_file_path.decode("utf-8")).one() if new_file is None: new_file = store.add(File(p.new_file_path.decode("utf-8"))) store.flush() for h in p.hunks: old_data = [l for l in h.lines if l[0] != "+"] new_data = [l for l in h.lines if l[0] != "-"] logger.debug("hunk old:%d-%d, new:%d-%d", h.old_start, h.old_lines, h.new_start, h.new_lines) deletions = [(l[1], i) for i, l in enumerate(old_data) if l[0] == "-"] additions = [(l[1], i) for i, l in enumerate(new_data) if l[0] == "+"] # Identify changed lines changed_lines = [] if len(deletions) != 0 and len(additions) != 0: d = map(lambda (x, y): levenshtein_distance(x[0], y[0], True), product(deletions, additions)) step = len(additions) matrix = [d[x : x + step] for x in xrange(0, len(d), step)] indexes = hungarian(matrix) changed_lines = [ (deletions[i][1], additions[j][1]) for i, j in indexes if 0.0 < matrix[i][j] and matrix[i][j] < 0.4 ] # Store all changed lines for x, y in changed_lines: store.add(Edit(commit, old_file, new_file, h.old_start + x, h.new_start + y)) # Store all deleted lines for l, x in deletions: if x not in [i for i, j in changed_lines]: store.add(Edit(commit, old_file, new_file, h.old_start + x, None)) # Store all added lines for l, y in additions: if y not in [j for i, j in changed_lines]: store.add(Edit(commit, old_file, new_file, None, h.new_start + y)) store.flush() # Save the changes store.commit()
def build(repo, store, options): for c in repo.walk(repo.head.oid, GIT_SORT_REVERSE): logger.info('commit %s', c.hex) # Store commit commit = store.find(Commit, Commit.hex == c.hex.decode('utf-8')).one() if commit is None: commit = store.add(Commit(c.hex.decode('utf-8'), c.commit_time)) store.flush() # Check commit message for i in test_regexp(c.message): bug = store.find(Bug, Bug.bug_no == i).one() if not bug: bug = store.add(Bug(i)) bug.commits.add(commit) logger.debug('bug %d', i) store.flush() # Check parents for p in c.parents: parent = store.find(Commit, Commit.hex == p.hex.decode('utf-8')).one() if parent is None: parent = store.add(Commit(p.hex.decode('utf-8'), p.commit_time)) commit.parents.add(parent) logger.debug('parent %s', p.hex) diff = p.tree.diff(c.tree) if 'hunks' not in diff.changes: continue for h in [f for f in diff.changes['hunks'] if splitext(f.old_file)[1] in ['.c', '.h']]: old_file = store.find(File, File.path == h.old_file.decode('utf-8')).one() if old_file is None: old_file = store.add(File(h.old_file.decode('utf-8'))) new_file = store.find(File, File.path == h.new_file.decode('utf-8')).one() if new_file is None: new_file = store.add(File(h.new_file.decode('utf-8'))) store.flush() old_data = [l for l in h.data if l[1] != GIT_DIFF_LINE_ADDITION] new_data = [l for l in h.data if l[1] != GIT_DIFF_LINE_DELETION] logger.debug('hunk old:%d-%d, new:%d-%d', h.old_start, h.old_lines, h.new_start, h.new_lines) deletions = [(l[0], i) for i, l in enumerate(old_data) if l[1] == GIT_DIFF_LINE_DELETION] additions = [(l[0], i) for i, l in enumerate(new_data) if l[1] == GIT_DIFF_LINE_ADDITION] # Identify changed lines changed_lines = [] if len(deletions) != 0 and len(additions) != 0: d = map(lambda (x, y): levenshtein_distance(x[0], y[0], True), product(deletions, additions)) step = len(additions) matrix = [d[x:x+step] for x in xrange(0, len(d), step)] indexes = hungarian(matrix) changed_lines = [(deletions[i][1], additions[j][1]) for i, j in indexes if 0.0 < matrix[i][j] and matrix[i][j] < 0.4] # Store all changed lines for x, y in changed_lines: store.add(Edit(commit, old_file, new_file, h.old_start + x, h.new_start + y)) # Store all deleted lines for l, x in deletions: if x not in [i for i, j in changed_lines]: store.add(Edit(commit, old_file, new_file, h.old_start + x, None)) # Store all added lines for l, y in additions: if y not in [j for i, j in changed_lines]: store.add(Edit(commit, old_file, new_file, None, h.new_start + y)) store.flush() # Save the changes store.commit()