Exemple #1
0
    def test_hungarian(self):
        matrices = [
            # Square
            ([[400, 150, 400],
              [400, 450, 600],
              [300, 225, 300]],
             850 # expected cost
             ),
            # Rectangular variant
            ([[400, 150, 400, 1],
              [400, 450, 600, 2],
              [300, 225, 300, 3]],
              452 # expected cost
            ),
            # Square
            ([[10, 10,  8],
              [ 9,  8,  1],
              [ 9,  7,  4]],
              18
            ),
            # Rectangular variant
            ([[10, 10,  8, 11],
              [ 9,  8,  1, 1],
              [ 9,  7,  4, 10]],
              15
            ),
        ]

        for matrix, expected_cost in matrices:
            indexes = lapacho.hungarian(matrix)

            total_cost = 0
            for i, j in indexes:
                total_cost += matrix[i][j]

            self.assertEqual(expected_cost, total_cost)
Exemple #2
0
def build(repo, store, options):
    for c in repo.walk(repo.head.target, GIT_SORT_REVERSE):
        logger.info("commit %s", c.hex)

        # Store commit
        commit = store.find(Commit, Commit.hex == c.hex.decode("utf-8")).one()
        if commit is None:
            commit = store.add(Commit(c.hex.decode("utf-8"), c.commit_time))
        store.flush()

        # Check commit message
        for i in test_regexp(c.message):
            bug = store.find(Bug, Bug.bug_no == i).one()
            if not bug:
                bug = store.add(Bug(i))
            bug.commits.add(commit)

            logger.debug("bug %d", i)

            store.flush()

        # Check parents
        for p in c.parents:
            parent = store.find(Commit, Commit.hex == p.hex.decode("utf-8")).one()
            if parent is None:
                parent = store.add(Commit(p.hex.decode("utf-8"), p.commit_time))
            commit.parents.add(parent)

            logger.debug("parent %s", p.hex)

            diff = p.tree.diff_to_tree(c.tree, GIT_DIFF_IGNORE_WHITESPACE_EOL)

            for p in [p for p in diff if splitext(p.old_file_path)[1] in [".c", ".h"]]:
                logger.debug("path %s", p.old_file_path)

                old_file = store.find(File, File.path == p.old_file_path.decode("utf-8")).one()
                if old_file is None:
                    old_file = store.add(File(p.old_file_path.decode("utf-8")))
                new_file = store.find(File, File.path == p.new_file_path.decode("utf-8")).one()
                if new_file is None:
                    new_file = store.add(File(p.new_file_path.decode("utf-8")))
                store.flush()

                for h in p.hunks:
                    old_data = [l for l in h.lines if l[0] != "+"]
                    new_data = [l for l in h.lines if l[0] != "-"]

                    logger.debug("hunk old:%d-%d, new:%d-%d", h.old_start, h.old_lines, h.new_start, h.new_lines)

                    deletions = [(l[1], i) for i, l in enumerate(old_data) if l[0] == "-"]
                    additions = [(l[1], i) for i, l in enumerate(new_data) if l[0] == "+"]

                    # Identify changed lines
                    changed_lines = []
                    if len(deletions) != 0 and len(additions) != 0:
                        d = map(lambda (x, y): levenshtein_distance(x[0], y[0], True), product(deletions, additions))
                        step = len(additions)
                        matrix = [d[x : x + step] for x in xrange(0, len(d), step)]

                        indexes = hungarian(matrix)
                        changed_lines = [
                            (deletions[i][1], additions[j][1])
                            for i, j in indexes
                            if 0.0 < matrix[i][j] and matrix[i][j] < 0.4
                        ]

                    # Store all changed lines
                    for x, y in changed_lines:
                        store.add(Edit(commit, old_file, new_file, h.old_start + x, h.new_start + y))

                    # Store all deleted lines
                    for l, x in deletions:
                        if x not in [i for i, j in changed_lines]:
                            store.add(Edit(commit, old_file, new_file, h.old_start + x, None))

                    # Store all added lines
                    for l, y in additions:
                        if y not in [j for i, j in changed_lines]:
                            store.add(Edit(commit, old_file, new_file, None, h.new_start + y))

                    store.flush()

        # Save the changes
        store.commit()
Exemple #3
0
def build(repo, store, options):
    for c in repo.walk(repo.head.oid, GIT_SORT_REVERSE):
        logger.info('commit %s', c.hex)

        # Store commit
        commit = store.find(Commit, Commit.hex == c.hex.decode('utf-8')).one()
        if commit is None:
            commit = store.add(Commit(c.hex.decode('utf-8'), c.commit_time))
        store.flush()

        # Check commit message
        for i in test_regexp(c.message):
            bug = store.find(Bug, Bug.bug_no == i).one()
            if not bug: bug = store.add(Bug(i))
            bug.commits.add(commit)

            logger.debug('bug %d', i)

            store.flush()

        # Check parents
        for p in c.parents:
            parent = store.find(Commit, Commit.hex == p.hex.decode('utf-8')).one()
            if parent is None:
                parent = store.add(Commit(p.hex.decode('utf-8'), p.commit_time))
            commit.parents.add(parent)

            logger.debug('parent %s', p.hex)

            diff = p.tree.diff(c.tree)
            if 'hunks' not in diff.changes:
                continue

            for h in [f for f in diff.changes['hunks'] if splitext(f.old_file)[1] in ['.c', '.h']]:
                old_file = store.find(File, File.path == h.old_file.decode('utf-8')).one()
                if old_file is None:
                    old_file = store.add(File(h.old_file.decode('utf-8')))
                new_file = store.find(File, File.path == h.new_file.decode('utf-8')).one()
                if new_file is None:
                    new_file = store.add(File(h.new_file.decode('utf-8')))
                store.flush()

                old_data = [l for l in h.data if l[1] != GIT_DIFF_LINE_ADDITION]
                new_data = [l for l in h.data if l[1] != GIT_DIFF_LINE_DELETION]

                logger.debug('hunk old:%d-%d, new:%d-%d', h.old_start, h.old_lines, h.new_start, h.new_lines)

                deletions = [(l[0], i) for i, l in enumerate(old_data) if l[1] == GIT_DIFF_LINE_DELETION]
                additions = [(l[0], i) for i, l in enumerate(new_data) if l[1] == GIT_DIFF_LINE_ADDITION]

                # Identify changed lines
                changed_lines = []
                if len(deletions) != 0 and len(additions) != 0:
                    d = map(lambda (x, y): levenshtein_distance(x[0], y[0], True), product(deletions, additions))
                    step = len(additions)
                    matrix = [d[x:x+step] for x in xrange(0, len(d), step)]

                    indexes = hungarian(matrix)
                    changed_lines = [(deletions[i][1], additions[j][1]) for i, j in indexes if 0.0 < matrix[i][j] and matrix[i][j] < 0.4]

                # Store all changed lines
                for x, y in changed_lines:
                    store.add(Edit(commit, old_file, new_file, h.old_start + x, h.new_start + y))

                # Store all deleted lines
                for l, x in deletions:
                    if x not in [i for i, j in changed_lines]:
                        store.add(Edit(commit, old_file, new_file, h.old_start + x, None))

                # Store all added lines
                for l, y in additions:
                    if y not in [j for i, j in changed_lines]:
                        store.add(Edit(commit, old_file, new_file, None, h.new_start + y))

                store.flush()

        # Save the changes
        store.commit()