Beispiel #1
0
def test_datadog_bindings():
    repo = FakeGHRepo()
    mq = MergeQueue(repo)
    from datadog_stats import Stats

    mq.stats = Stats(api_key="fdjkfdjkjkfd")
    assert type(mq.stats) == Stats
Beispiel #2
0
def test_excommunicate_pr():
    mq = MergeQueue(FakeGHRepo())
    mq.ask_pr(12)
    mq.bless_pr(12)
    assert mq.queue[0].blessed
    mq.excommunicate_pr(12)
    assert not mq.queue[0].blessed

    with pytest.raises(MergeQueueException):
        mq.excommunicate_pr(13)
Beispiel #3
0
    def activate(self):
        super(Summit, self).activate()
        if not self.config:
            # ie. if the plugin is not configured, it cannot activate.
            return

        if ROOMS not in self:
            self[ROOMS] = {}
        self.gh = Github(self.config['github-token'], api_preview=True)
        self.queues = {}  # Those are MergeQueues
        self.rooms_lock = RLock()
        try:
            self.gh_status = self.get_plugin('GHStatus')
        except:
            self.log.info(
                "If you want notifications to your chat users on PRs you can install the companion plugin err-ghstatus."
            )
            self.gh_status = None

        # Reload the state from the storage.
        with self.mutable(ROOMS) as rooms:
            for room_name, repo in rooms.items():
                for room in self[ROOMS]:
                    self.queues[room] = MergeQueue(self.gh.get_repo(repo.name),
                                                   initial_queue=repo.queue)

        self.start_poller(120, method=self.check_pr_states)
Beispiel #4
0
    def short_pr_list(self, merge_queue: MergeQueue):
        """
        Build the short form list of PRs in a queue.
        """
        result = ''
        for i, pr in enumerate(merge_queue.get_queue()):
            mergeable = ':thumbsup:' if pr.mergeable and pr.mergeable_state == 'clean' else ':no_entry:'
            blessed = ':angel:' if pr.blessed else ''
            next_up = ':up:' if pr.nb in merge_queue.pulled_prs else ''
            result += f'{i}. [#{pr.nb}]({pr.url}) {blessed} {next_up} {pr.user} merge: {mergeable} {pr.mergeable_state}'

            dependent_prs = merge_queue.count_dependent_prs(pr)
            if dependent_prs > 0:
                result += f'{dependent_prs} Chained PRs.'
            result += '\n'
        return result
Beispiel #5
0
    def long_pr_list(self,
                     merge_queue: MergeQueue,
                     pr_list=None,
                     level: int = 0,
                     with_desc: bool = False):
        """
        Build the long form list of PRs.
        """
        result = ''
        tab_size = 4
        indentation = ' ' * (tab_size * level)
        if pr_list is None:
            pr_list = merge_queue.get_queue()
        for i, pr in enumerate(pr_list):
            mergeable = ':thumbsup:' if pr.mergeable and pr.mergeable_state == 'clean' else ':no_entry:'
            blessed = ':angel:' if pr.blessed else ''
            next_up = ':up:' if pr.nb in merge_queue.pulled_prs else ''
            title = pr.title
            description = '\n\n'
            if with_desc:
                for line in pr.description.splitlines()[:5]:
                    description += f'    {indentation}| {line}\n\n'
            result += f'{indentation}{i+1}. [#{pr.nb}]({pr.url}) {blessed} {next_up} {pr.user} reviews: ' \
                      f'+:{pr.positive} -:{pr.negative} ~:{pr.pending} ' \
                      f'merge: {mergeable} {pr.mergeable_state} - {title}.{description}'

            if len(pr.dependents) > 0:
                result += f'{indentation}{merge_queue.count_dependent_prs(pr)} chained PRs for [#{pr.nb}]({pr.url})' \
                          f'\n\n{self.long_pr_list(merge_queue, pr.dependents, level=level + 1, with_desc=False)}'
        return result
Beispiel #6
0
 def __init__(self, watershed=array([]), probabilities=array([]), 
         merge_priority_function=None, allow_shared_boundaries=True,
         gt_vol=None, feature_manager=MomentsFeatureManager(), 
         show_progress=False, lowmem=False, connectivity=1,
         channel_is_oriented=None, orientation_map=array([]),
         normalize_probabilities=False):
     """Create a graph from a watershed volume and image volume.
     
     The watershed is assumed to have dams of label 0 in between basins.
     Then, each basin corresponds to a node in the graph and an edge is
     placed between two nodes if there are one or more watershed pixels
     connected to both corresponding basins.
     """
     super(Rag, self).__init__(weighted=False)
     self.show_progress = show_progress
     if merge_priority_function is None:
         self.merge_priority_function = boundary_mean
     else:
         self.merge_priority_function = merge_priority_function
     self.set_watershed(watershed, lowmem, connectivity)
     self.set_probabilities(probabilities, normalize_probabilities)
     self.set_orientations(orientation_map, channel_is_oriented)
     if watershed is None:
         self.ucm = None
     else:
         self.ucm = array(self.watershed==0, dtype=float)
         self.ucm[self.ucm==0] = -inf
         self.ucm_r = self.ucm.ravel()
     self.max_merge_score = -inf
     self.build_graph_from_watershed(allow_shared_boundaries)
     self.set_feature_manager(feature_manager)
     self.set_ground_truth(gt_vol)
     self.merge_queue = MergeQueue()
Beispiel #7
0
    def build_merge_queue(self):
        """Build a queue of node pairs to be merged in a specific priority.
        
        The queue elements have a specific format in order to allow 'removing'
        of specific elements inside the priority queue. Each element is a list
        of length 4 containing:
            - the merge priority (any ordered type)
            - a 'valid' flag
            - and the two nodes in arbitrary order
        The valid flag allows one to "remove" elements by setting the flag to
        False. Then one checks the flag when popping elements and ignores those
        marked as invalid.

        One other specific feature is that there are back-links from edges to
        their corresponding queue items so that when nodes are merged,
        affected edges can be invalidated and reinserted in the queue.
        """
        queue_items = []
        for l1, l2 in self.real_edges_iter():
            w = self.merge_priority_function(self, l1, l2)
            qitem = [w, True, l1, l2]
            queue_items.append(qitem)
            self[l1][l2]['qlink'] = qitem
            self[l1][l2]['weight'] = w
        return MergeQueue(queue_items, with_progress=self.show_progress)
Beispiel #8
0
def test_simple_rmpr():
    mq = MergeQueue(FakeGHRepo())
    mq.ask_pr(12)
    assert len(mq.queue) == 1

    mq.rm_pr(12)
    assert len(mq.queue) == 0

    with pytest.raises(MergeQueueException):
        mq.rm_pr(14)
Beispiel #9
0
def test_sink_pr():
    mq = MergeQueue(FakeGHRepo())
    mq.ask_pr(12)
    mq.ask_pr(13)
    assert mq.queue[0].nb == 12
    mq.sink_pr(12)
    assert mq.queue[0].nb == 13
    assert mq.queue[1].nb == 12

    with pytest.raises(MergeQueueException):
        mq.sink_pr(14)
Beispiel #10
0
 def __init__(self,
              watershed=array([]),
              probabilities=array([]),
              merge_priority_function=None,
              allow_shared_boundaries=True,
              gt_vol=None,
              feature_manager=MomentsFeatureManager(),
              show_progress=False,
              lowmem=False,
              connectivity=1,
              channel_is_oriented=None,
              orientation_map=array([]),
              normalize_probabilities=False):
     """Create a graph from a watershed volume and image volume.
     
     The watershed is assumed to have dams of label 0 in between basins.
     Then, each basin corresponds to a node in the graph and an edge is
     placed between two nodes if there are one or more watershed pixels
     connected to both corresponding basins.
     """
     super(Rag, self).__init__(weighted=False)
     self.show_progress = show_progress
     if merge_priority_function is None:
         self.merge_priority_function = boundary_mean
     else:
         self.merge_priority_function = merge_priority_function
     self.set_watershed(watershed, lowmem, connectivity)
     self.set_probabilities(probabilities, normalize_probabilities)
     self.set_orientations(orientation_map, channel_is_oriented)
     if watershed is None:
         self.ucm = None
     else:
         self.ucm = array(self.watershed == 0, dtype=float)
         self.ucm[self.ucm == 0] = -inf
         self.ucm_r = self.ucm.ravel()
     self.max_merge_score = -inf
     self.build_graph_from_watershed(allow_shared_boundaries)
     self.set_feature_manager(feature_manager)
     self.set_ground_truth(gt_vol)
     self.merge_queue = MergeQueue()
Beispiel #11
0
def test_askpr():
    mq = MergeQueue(FakeGHRepo())
    mq.ask_pr(13)
    assert len(mq.queue) == 1
    assert mq.queue[0].nb == 13
    with pytest.raises(MergeQueueException):
        mq.ask_pr(13)
    assert len(mq.queue) == 1
Beispiel #12
0
def test_check_merged_by_errbot():
    pr_14 = FakeGHPullRequest(14, reviews=[FakeGHReview('user1', APPROVED)])
    repo = FakeGHRepo(injected_prs=[pr_14])
    mq = MergeQueue(repo)
    mq.ask_pr(14)
    transitions = list(mq.check())
    assert len(transitions) == 0  # nothing changed so nothing should happen

    pr_14.mergeable_state = CLEAN
    pr_14.mergeable = True
    transitions = list(mq.check())
    assert len(transitions) == 1
    pr, [(transition, params)] = transitions[0]
    assert transition == PRTransition.NOW_MERGEABLE  # PR is not blessed so nothing else should happen

    mq.bless_pr(14)
    transitions = list(mq.check())
    assert len(transitions) == 1
    assert pr_14.asked_to_be_merged
    pr, [(transition, params)] = transitions[0]
    assert transition == PRTransition.MERGING
Beispiel #13
0
def test_check_merged_externally():
    pr_14 = FakeGHPullRequest(14)
    repo = FakeGHRepo(injected_prs=[pr_14])
    mq = MergeQueue(repo)
    mq.ask_pr(14)
    transitions = list(mq.check())
    assert len(transitions) == 0  # nothing changed so nothing should happen

    pr_14.merged = True  # PR has been merged externally
    transitions = list(mq.check())
    assert len(transitions) == 1
    pr, [(transition, params)] = transitions[0]
    assert transition == PRTransition.MERGED
Beispiel #14
0
def test_reviews():
    pr_1 = FakeGHPullRequest(1,
                             reviews=[FakeGHReview('user1', APPROVED)],
                             mergeable_state=CLEAN)
    repo = FakeGHRepo(injected_prs=[pr_1])
    mq = MergeQueue(repo, max_pulled_prs=2)

    mq.ask_pr(1)
    mq.bless_pr(1)

    transitions = list(mq.check())
    assert len(transitions) == 0

    pr_1.add_review(FakeGHReview('user1', COMMENTED))
    transitions = list(mq.check())
    assert len(transitions) == 0

    pr_1.add_review(FakeGHReview('user1', REQUEST_CHANGES))

    transitions = list(mq.check())
    assert len(transitions) == 1
    pr, review_transitions = transitions[0]
    assert review_transitions[0] == (PRTransition.GOT_POSITIVE, 0)
    assert review_transitions[1] == (PRTransition.GOT_NEGATIVE, 1)
Beispiel #15
0
    def merge_config(self, msg, args):
        """
        Configure the merge queue with repo for a room (defaults to the current
        room). Warning: this is killing the queue.
        """
        try:
            repo, room = self.optional_room_precheck(msg, args)
        except Exception as e:
            return f'Error {e}'

        gh_repo = self.gh.get_repo(repo)
        with self.rooms_lock:
            with self.mutable(ROOMS) as rooms:
                rooms[room] = Repo(name=repo,
                                   owner=msg.frm,
                                   queue=[],
                                   saints=[msg.frm.aclattr])
                self.queues[room] = MergeQueue(gh_repo)

        return f'Configured {room} with this repo {gh_repo.name}'
Beispiel #16
0
def test_check_review_counting():
    pr_14 = FakeGHPullRequest(14)
    repo = FakeGHRepo(injected_prs=[pr_14])
    mq = MergeQueue(repo)
    mq.ask_pr(14)
    transitions = list(mq.check())
    assert len(transitions) == 0  # nothing changed so nothing should happen

    pr_14.reviews.append(FakeGHReview('dugenou', APPROVED))

    transitions = list(mq.check())
    assert len(transitions) == 1
    pr, [(transition, params)] = transitions[0]
    assert transition == PRTransition.GOT_POSITIVE

    pr_14.reviews.append(FakeGHReview('dugland', CHANGES_REQUESTED))

    transitions = list(mq.check())
    assert len(transitions) == 1
    pr, [(transition, params)] = transitions[0]
    assert transition == PRTransition.GOT_NEGATIVE
Beispiel #17
0
class Rag(Graph):
    """Region adjacency graph for segmentation of nD volumes."""
    def __init__(self,
                 watershed=array([]),
                 probabilities=array([]),
                 merge_priority_function=None,
                 allow_shared_boundaries=True,
                 gt_vol=None,
                 feature_manager=MomentsFeatureManager(),
                 show_progress=False,
                 lowmem=False,
                 connectivity=1,
                 channel_is_oriented=None,
                 orientation_map=array([]),
                 normalize_probabilities=False):
        """Create a graph from a watershed volume and image volume.
        
        The watershed is assumed to have dams of label 0 in between basins.
        Then, each basin corresponds to a node in the graph and an edge is
        placed between two nodes if there are one or more watershed pixels
        connected to both corresponding basins.
        """
        super(Rag, self).__init__(weighted=False)
        self.show_progress = show_progress
        if merge_priority_function is None:
            self.merge_priority_function = boundary_mean
        else:
            self.merge_priority_function = merge_priority_function
        self.set_watershed(watershed, lowmem, connectivity)
        self.set_probabilities(probabilities, normalize_probabilities)
        self.set_orientations(orientation_map, channel_is_oriented)
        if watershed is None:
            self.ucm = None
        else:
            self.ucm = array(self.watershed == 0, dtype=float)
            self.ucm[self.ucm == 0] = -inf
            self.ucm_r = self.ucm.ravel()
        self.max_merge_score = -inf
        self.build_graph_from_watershed(allow_shared_boundaries)
        self.set_feature_manager(feature_manager)
        self.set_ground_truth(gt_vol)
        self.merge_queue = MergeQueue()

    def __copy__(self):
        if sys.version_info[:2] < (2, 7):
            # Python versions prior to 2.7 don't handle deepcopy of function
            # objects well. Thus, keep a reference and remove from Rag object
            f = self.neighbor_idxs
            del self.neighbor_idxs
            F = self.feature_manager
            del self.feature_manager
        pr_shape = self.probabilities_r.shape
        g = super(Rag, self).copy()
        g.watershed_r = g.watershed.ravel()
        g.segmentation_r = g.segmentation.ravel()
        g.ucm_r = g.ucm.ravel()
        g.probabilities_r = g.probabilities.reshape(pr_shape)
        if sys.version_info[:2] < (2, 7):
            g.neighbor_idxs = f
            self.neighbor_idxs = f
            g.feature_manager = F
            self.feature_manager = F
        return g

    def copy(self):
        return self.__copy__()

    def real_edges(self, *args, **kwargs):
        return [
            e for e in super(Rag, self).edges(*args, **kwargs)
            if self.boundary_body not in e[:2]
        ]

    def real_edges_iter(self, *args, **kwargs):
        return (e for e in super(Rag, self).edges_iter(*args, **kwargs)
                if self.boundary_body not in e[:2])

    def build_graph_from_watershed(self,
                                   allow_shared_boundaries=True,
                                   idxs=None):
        if self.watershed.size == 0: return  # stop processing for empty graphs
        if not allow_shared_boundaries:
            self.ignored_boundary = zeros(self.watershed.shape, bool)
        if idxs is None:
            idxs = arange(self.watershed.size)
            self.add_node(
                self.boundary_body,
                extent=set(flatnonzero(self.watershed == self.boundary_body)))
        inner_idxs = idxs[self.watershed_r[idxs] != self.boundary_body]
        pbar = ip.StandardProgressBar() if self.show_progress \
                                        else ip.NoProgressBar()
        for idx in ip.with_progress(inner_idxs, title='Graph... ', pbar=pbar):
            ns = self.neighbor_idxs(idx)
            adj_labels = self.watershed_r[ns]
            adj_labels = unique(adj_labels)
            adj_labels = adj_labels[adj_labels.nonzero()]
            nodeid = self.watershed_r[idx]
            if nodeid != 0:
                adj_labels = adj_labels[adj_labels != nodeid]
                edges = zip(repeat(nodeid), adj_labels)
                if not self.has_node(nodeid):
                    self.add_node(nodeid, extent=set())
                try:
                    self.node[nodeid]['extent'].add(idx)
                except KeyError:
                    self.node[nodeid]['extent'] = set([idx])
            else:
                if len(adj_labels) == 0: continue
                if adj_labels[-1] != self.boundary_body:
                    edges = list(combinations(adj_labels, 2))
                else:
                    edges = list(product([self.boundary_body],
                                         adj_labels[:-1]))
            if allow_shared_boundaries or len(edges) == 1:
                for l1, l2 in edges:
                    if self.has_edge(l1, l2):
                        self[l1][l2]['boundary'].add(idx)
                    else:
                        self.add_edge(l1, l2, boundary=set([idx]))
            elif len(edges) > 1:
                self.ignored_boundary.ravel()[idx] = True

    def set_feature_manager(self, feature_manager):
        self.feature_manager = feature_manager
        if len(self.feature_manager) > 0:
            self.compute_feature_caches()

    def compute_feature_caches(self):
        for n in self.nodes_iter():
            self.node[n]['feature-cache'] = \
                            self.feature_manager.create_node_cache(self, n)
        for n1, n2 in self.edges_iter():
            self[n1][n2]['feature-cache'] = \
                            self.feature_manager.create_edge_cache(self, n1, n2)

    def get_neighbor_idxs_fast(self, idxs):
        return self.pixel_neighbors[idxs]

    def get_neighbor_idxs_lean(self, idxs, connectivity=1):
        return morpho.get_neighbor_idxs(self.watershed, idxs, connectivity)

    def set_probabilities(self, probs=array([]), normalize=False):
        if len(probs) == 0:
            self.probabilities = zeros_like(self.watershed)
            self.probabilities_r = self.probabilities.ravel()
        probs = probs.astype(double)
        if normalize and len(probs) > 1:
            probs -= probs.min()  # ensure probs.min() == 0
            probs /= probs.max()  # ensure probs.max() == 1
        sp = probs.shape
        sw = tuple(array(self.watershed.shape, dtype=int)-\
                    2*self.pad_thickness*ones(self.watershed.ndim, dtype=int))
        p_ndim = probs.ndim
        w_ndim = self.watershed.ndim
        padding = [inf] + (self.pad_thickness - 1) * [0]
        if p_ndim == w_ndim:
            self.probabilities = morpho.pad(probs, padding)
            self.probabilities_r = self.probabilities.ravel()[:, newaxis]
        elif p_ndim == w_ndim + 1:
            if sp[1:] == sw:
                sp = sp[1:] + [sp[0]]
                probs = probs.transpose(sp)
            axes = range(p_ndim - 1)
            self.probabilities = morpho.pad(probs, padding, axes)
            self.probabilities_r = self.probabilities.reshape(
                (self.watershed.size, -1))

    def set_orientations(self, orientation_map, channel_is_oriented):
        if len(orientation_map) == 0:
            self.orientation_map = zeros_like(self.watershed)
            self.orientation_map_r = self.orientation_map.ravel()
        so = orientation_map.shape
        sw = tuple(array(self.watershed.shape, dtype=int)-\
                2*self.pad_thickness*ones(self.watershed.ndim, dtype=int))
        o_ndim = orientation_map.ndim
        w_ndim = self.watershed.ndim
        padding = [0] + (self.pad_thickness - 1) * [0]
        self.orientation_map = morpho.pad(orientation_map, padding).astype(int)
        self.orientation_map_r = self.orientation_map.ravel()
        if channel_is_oriented is None:
            nchannels = 1 if self.probabilities.ndim==self.watershed.ndim \
                else self.probabilities.shape[-1]
            self.channel_is_oriented = array([False] * nchannels)
            self.max_probabilities_r = zeros_like(self.probabilities_r)
            self.oriented_probabilities_r = zeros_like(self.probabilities_r)
            self.non_oriented_probabilities_r = self.probabilities_r
        else:
            self.channel_is_oriented = channel_is_oriented
            self.max_probabilities_r = \
                self.probabilities_r[:, self.channel_is_oriented].max(axis=1)
            self.oriented_probabilities_r = \
                self.probabilities_r[:, self.channel_is_oriented]
            self.oriented_probabilities_r = \
                self.oriented_probabilities_r[
                    range(len(self.oriented_probabilities_r)),
                    self.orientation_map_r]
            self.non_oriented_probabilities_r = \
                self.probabilities_r[:, ~self.channel_is_oriented]

    def set_watershed(self, ws=array([]), lowmem=False, connectivity=1):
        try:
            self.boundary_body = ws.max() + 1
        except ValueError:  # empty watershed given
            self.boundary_body = -1
        self.volume_size = ws.size
        self.has_zero_boundaries = (ws == 0).any()
        if self.has_zero_boundaries:
            self.watershed = morpho.pad(ws, [0, self.boundary_body])
        else:
            self.watershed = morpho.pad(ws, self.boundary_body)
        self.segmentation = self.watershed.copy()
        self.watershed_r = self.watershed.ravel()
        self.segmentation_r = self.segmentation.ravel()  # reduce fct calls
        self.pad_thickness = 2 if (self.segmentation == 0).any() else 1
        if lowmem:

            def neighbor_idxs(x):
                return self.get_neighbor_idxs_lean(x, connectivity)

            self.neighbor_idxs = neighbor_idxs
        else:
            self.pixel_neighbors = \
                morpho.build_neighbors_array(self.watershed, connectivity)
            self.neighbor_idxs = self.get_neighbor_idxs_fast

    def set_ground_truth(self, gt=None):
        if gt is not None:
            gtm = gt.max() + 1
            gt_ignore = [0, gtm] if (gt == 0).any() else [gtm]
            seg_ignore = [0, self.boundary_body] if \
                        (self.segmentation==0).any() else [self.boundary_body]
            self.gt = morpho.pad(gt, gt_ignore)
            self.rig = contingency_table(self.segmentation, self.gt)
            self.rig[:, gt_ignore] = 0
            self.rig[seg_ignore, :] = 0
        else:
            self.gt = None
            # null pattern to transparently allow merging of nodes.
            # Bonus feature: counts how many sp's went into a single node.
            try:
                self.rig = ones(self.watershed.max() + 1)
            except ValueError:
                self.rig = ones(self.number_of_nodes() + 1)

    def build_merge_queue(self):
        """Build a queue of node pairs to be merged in a specific priority.
        
        The queue elements have a specific format in order to allow 'removing'
        of specific elements inside the priority queue. Each element is a list
        of length 4 containing:
            - the merge priority (any ordered type)
            - a 'valid' flag
            - and the two nodes in arbitrary order
        The valid flag allows one to "remove" elements by setting the flag to
        False. Then one checks the flag when popping elements and ignores those
        marked as invalid.

        One other specific feature is that there are back-links from edges to
        their corresponding queue items so that when nodes are merged,
        affected edges can be invalidated and reinserted in the queue.
        """
        queue_items = []
        for l1, l2 in self.real_edges_iter():
            w = self.merge_priority_function(self, l1, l2)
            qitem = [w, True, l1, l2]
            queue_items.append(qitem)
            self[l1][l2]['qlink'] = qitem
            self[l1][l2]['weight'] = w
        return MergeQueue(queue_items, with_progress=self.show_progress)

    def rebuild_merge_queue(self):
        """Build a merge queue from scratch and assign to self.merge_queue."""
        self.merge_queue = self.build_merge_queue()

    def agglomerate(self, threshold=0.5, save_history=False):
        """Merge nodes sequentially until given edge confidence threshold."""
        if self.merge_queue.is_empty():
            self.merge_queue = self.build_merge_queue()
        history, scores, evaluation = [], [], []
        while len(self.merge_queue) > 0 and \
                                        self.merge_queue.peek()[0] < threshold:
            merge_priority, valid, n1, n2 = self.merge_queue.pop()
            if valid:
                self.update_ucm(n1, n2, merge_priority)
                self.merge_nodes(n1, n2)
                if save_history:
                    history.append((n1, n2))
                    scores.append(merge_priority)
                    evaluation.append(
                        (self.number_of_nodes() - 1, self.split_vi()))
        if save_history:
            return history, scores, evaluation

    def agglomerate_count(self, stepsize=100, save_history=False):
        """Agglomerate until 'stepsize' merges have been made."""
        if self.merge_queue.is_empty():
            self.merge_queue = self.build_merge_queue()
        history, evaluation = [], []
        i = 0
        while len(self.merge_queue) > 0 and i < stepsize:
            merge_priority, valid, n1, n2 = self.merge_queue.pop()
            if valid:
                i += 1
                self.update_ucm(n1, n2, merge_priority)
                self.merge_nodes(n1, n2)
                if save_history:
                    history.append((n1, n2))
                    evaluation.append(
                        (self.number_of_nodes() - 1, self.split_vi()))
        if save_history:
            return history, evaluation

    def agglomerate_ladder(self, threshold=1000, strictness=1):
        """Merge sequentially all nodes smaller than threshold.
        
        strictness = 1 only considers size of nodes
        strictness = 2 adds additional constraint: small nodes can only be 
        merged to large neighbors
        strictness = 3 additionally requires that the boundary between nodes
        be larger than 2 pixels
        Note: nodes that are on the volume boundary are not agglomerated.
        """
        original_merge_priority_function = self.merge_priority_function
        self.merge_priority_function = make_ladder(
            self.merge_priority_function, threshold, strictness)
        self.rebuild_merge_queue()
        self.agglomerate(inf)
        self.merge_priority_function = original_merge_priority_function
        self.merge_queue.finish()
        self.rebuild_merge_queue()

    def one_shot_agglomeration(self, threshold=0.5):
        g = self.copy()
        if len(g.merge_queue) == 0:
            g.rebuild_merge_queue()
        for u, v, d in g.edges(data=True):
            if g.boundary_body in [u, v] or d['weight'] > threshold:
                g.remove_edge(u, v)
        ccs = connected_components(g)
        for cc in ccs:
            g.merge_subgraph(cc)
        return g.get_segmentation()

    def assign_gt_to_ws(self, gt):
        ws_nopad = morpho.juicy_center(self.watershed, self.pad_thickness)
        bdrymap = morpho.pad(morpho.seg_to_bdry(ws_nopad),
                             [0] * self.pad_thickness)
        gt_bdrymap_nopad = morpho.seg_to_bdry(gt)
        gt_bdrymap = morpho.pad(gt_bdrymap_nopad, [0] * self.pad_thickness)
        k = distance_transform_cdt(1 - bdrymap, return_indices=True)
        ind = nonzero(gt_bdrymap.ravel())[0]
        closest_sub = numpy.concatenate([i.ravel()[:, newaxis] for i in k[1]],
                                        axis=1)
        closest_sub = closest_sub[ind, :]
        closest_ind = [
            dot(bdrymap.strides, i) / bdrymap.itemsize for i in closest_sub
        ]
        M = zeros_like(bdrymap).astype(float)
        M.ravel()[closest_ind] = 1.0
        bdrymap.ravel()[closest_ind] = False
        k = distance_transform_cdt(1 - bdrymap, return_indices=True)
        ind = nonzero(gt_bdrymap.ravel())[0]
        closest_sub = numpy.concatenate([i.ravel()[:, newaxis] for i in k[1]],
                                        axis=1)
        closest_sub = closest_sub[ind, :]
        closest_ind = [
            dot(bdrymap.strides, i) / bdrymap.itemsize for i in closest_sub
        ]
        M.ravel()[closest_ind] = 1.0
        return M

    def learn_agglomerate(self,
                          gts,
                          feature_map,
                          min_num_samples=1,
                          *args,
                          **kwargs):
        """Agglomerate while comparing to ground truth & classifying merges."""
        learn_flat = kwargs.get('learn_flat', True)
        learning_mode = kwargs.get('learning_mode', 'strict').lower()
        labeling_mode = kwargs.get('labeling_mode', 'assignment').lower()
        priority_mode = kwargs.get('priority_mode', 'random').lower()
        memory = kwargs.get('memory', True)
        unique = kwargs.get('unique', True)
        max_numepochs = kwargs.get('max_numepochs', 10)
        if priority_mode == 'mean' and unique:
            max_numepochs = 2 if learn_flat else 1
        if priority_mode in ['random', 'mean'] and not memory:
            max_numepochs = 1
        label_type_keys = {
            'assignment': 0,
            'vi-sign': 1,
            'rand-sign': 2,
            'boundary': 3
        }
        if type(gts) != list:
            gts = [gts]  # allow using single ground truth as input
        master_ctables = \
                [contingency_table(self.get_segmentation(), gt) for gt in gts]
        # Match the watershed to the ground truths
        ws_is_gt = zeros_like(self.watershed).astype(float)
        for gt in gts:
            ws_is_gt += self.assign_gt_to_ws(gt)
        ws_is_gt /= float(len(gts))
        ws_is_gt = ws_is_gt > 0.5
        alldata = []
        data = [[], [], [], []]
        for numepochs in range(max_numepochs):
            ctables = deepcopy(master_ctables)
            if len(data[0]) > min_num_samples:
                break
            if learn_flat and numepochs == 0:
                alldata.append(self.learn_flat(gts, feature_map, ws_is_gt))
                data = unique_learning_data_elements(alldata) if unique else \
                       alldata[-1]
                continue
            g = self.copy()
            if priority_mode == 'mean':
                g.merge_priority_function = boundary_mean
            elif numepochs > 0 and priority_mode == 'active' or \
                numepochs % 2 == 1 and priority_mode == 'mixed':
                cl = kwargs.get('classifier', RandomForest())
                cl = cl.fit(data[0], data[1][:,
                                             label_type_keys[labeling_mode]])
                if type(cl) == RandomForest:
                    logging.info('classifier oob error: %.2f' % cl.oob)
                g.merge_priority_function = \
                                        classifier_probability(feature_map, cl)
            elif priority_mode == 'random' or \
                (priority_mode == 'active' and numepochs == 0):
                g.merge_priority_function = random_priority
            elif priority_mode == 'custom':
                g.merge_priority_function = kwargs.get('mpf', boundary_mean)
            g.show_progress = False  # bug in MergeQueue usage causes
            # progressbar crash.
            g.rebuild_merge_queue()
            alldata.append(
                g._learn_agglomerate(ctables, feature_map, ws_is_gt,
                                     learning_mode, labeling_mode))
            if memory:
                if unique:
                    data = unique_learning_data_elements(alldata)
                else:
                    data = concatenate_data_elements(alldata)
            else:
                data = alldata[-1]
            logging.debug('data size %d at epoch %d' %
                          (len(data[0]), numepochs))
        return data, alldata

    def learn_flat(self, gts, feature_map, ws_is_gt, *args, **kwargs):
        if type(gts) != list:
            gts = [gts]  # allow using single ground truth as input
        ctables = [
            contingency_table(self.get_segmentation(), gt) for gt in gts
        ]
        assignments = [(ct == ct.max(axis=1)[:, newaxis]) for ct in ctables]
        return map(
            array,
            zip(*[
                self.learn_edge(e, ctables, assignments, feature_map, ws_is_gt)
                for e in self.real_edges()
            ]))

    def learn_edge(self,
                   edge,
                   ctables,
                   assignments,
                   feature_map,
                   ws_is_gt,
                   boundary_overlap_thresh=0.3):
        n1, n2 = edge
        features = feature_map(self, n1, n2).ravel()
        # Calculate weights for weighting data points
        s1, s2 = [len(self.node[n]['extent']) for n in [n1, n2]]
        weights = \
            compute_local_vi_change(s1, s2, self.volume_size), \
            compute_local_rand_change(s1, s2, self.volume_size)
        # Get the fraction of times that n1 and n2 assigned to
        # same segment in the ground truths
        cont_labels = [
            [(-1)**(a[n1, :] == a[n2, :]).all() for a in assignments],
            [compute_true_delta_vi(ctable, n1, n2) for ctable in ctables],
            [
                -compute_true_delta_rand(ctable, n1, n2, self.volume_size)
                for ctable in ctables
            ],
            [(self.compute_boundary_overlap_with_gt(n1, n2, ws_is_gt) >
              boundary_overlap_thresh) * 2 - 1]
        ]
        labels = [sign(mean(cont_label)) for cont_label in cont_labels]
        if any(map(isnan, labels)) or any([label == 0 for l in labels]):
            logging.debug('NaN or 0 labels found. ' +
                          ' '.join(map(str, [labels, (n1, n2)])))
        labels = [1 if i == 0 or isnan(i) else i for i in labels]
        return features, labels, weights, (n1, n2)

    def compute_boundary_overlap_with_gt(self, n1, n2, ws_is_gt):
        val = []
        val = ws_is_gt.ravel()[list(self[n1][n2]['boundary'])]
        return sum(val) / float(len(val))

    def _learn_agglomerate(self,
                           ctables,
                           feature_map,
                           gt_dts,
                           learning_mode='forbidden',
                           labeling_mode='assignment'):
        """Learn the agglomeration process using various strategies.

        Arguments:
            - one or more contingency tables between own segments and gold
            standard segmentations
            - a feature map function {Graph, node1, node2} |--> array([float])
            [- a learning mode]

        Value:
            A learning data matrix of shape 
            [n_training_examples x (n_features + 5)]. The elements after the
            features are the label, the approximate magnitude of the variation
            of information (VI) change, the approximate magnitude of the Rand
            index (RI) change, and the two nodes that were sampled.

        Learning modes:
            - strict: use positive-boundary examples to learn but never
            merge
            - loose: merge regardless of label
        Labeling modes:
            - assignment: assign each node to a gold standard node and 
            - vi-sign: compute the vi change resulting from merging candidate
            regions. Use the sign of the change as the training label.
            - rand-sign: compute the rand change resulting from merging the
            candidate regions. Use the sign of the change as the training
            label.
        """
        label_type_keys = {
            'assignment': 0,
            'vi-sign': 1,
            'rand-sign': 2,
            'boundary': 3
        }
        assignments = [(ct == ct.max(axis=1)[:, newaxis]) for ct in ctables]
        g = self
        data = []
        while len(g.merge_queue) > 0:
            merge_priority, valid, n1, n2 = g.merge_queue.pop()
            if valid:
                dat = g.learn_edge((n1, n2), ctables, assignments, feature_map,
                                   gt_dts)
                data.append(dat)
                label = dat[1][label_type_keys[labeling_mode]]
                if learning_mode != 'strict' or label < 0:
                    for ctable, assignment in zip(ctables, assignments):
                        ctable[n1] += ctable[n2]
                        ctable[n2] = 0
                        assignment[n1] = ctable[n1] == ctable[n1].max()
                        assignment[n2] = 0
                    g.merge_nodes(n1, n2)
        return map(array, zip(*data))

    def replay_merge_history(self, merge_seq, labels=None, num_errors=1):
        """Agglomerate according to a merge sequence, optionally labeled.
        
        The merge sequence and labels _must_ be generators if you don't want
        to manually keep track of how much has been consumed. The merging
        continues until num_errors false merges have been encountered, or 
        until the sequence is fully consumed.
        
        labels are -1 or 0 for 'should merge', 1 for 'should not merge'.
        
        Return value: number of elements consumed from merge_seq, and last
        merge pair observed.
        """
        if labels is None:
            labels1 = itertools.repeat(False)
            labels2 = itertools.repeat(False)
        else:
            labels1 = (label > 0 for label in labels)
            labels2 = (label > 0 for label in labels)
        counter = itertools.count()
        errors_remaining = conditional_countdown(labels2, num_errors)
        nodes = None
        for nodes, label, errs, count in \
                        izip(merge_seq, labels1, errors_remaining, counter):
            n1, n2 = nodes
            if not label:
                self.merge_nodes(n1, n2)
            elif errs == 0:
                break
        return count, nodes

    def update_ucm(self, n1, n2, score=-inf):
        """Update ultrametric contour map."""
        if self.ucm is not None:
            self.max_merge_score = max(self.max_merge_score, score)
            idxs = list(self[n1][n2]['boundary'])
            self.ucm_r[idxs] = self.max_merge_score

    def merge_nodes(self, n1, n2):
        """Merge two nodes, while updating the necessary edges."""
        self.node[n1]['extent'].update(self.node[n2]['extent'])
        self.feature_manager.update_node_cache(self, n1, n2,
                                               self.node[n1]['feature-cache'],
                                               self.node[n2]['feature-cache'])
        self.segmentation_r[list(self.node[n2]['extent'])] = n1
        new_neighbors = [
            n for n in self.neighbors(n2) if n not in [n1, self.boundary_body]
        ]
        for n in new_neighbors:
            self.merge_edge_properties((n2, n), (n1, n))
        # this if statement enables merging of non-adjacent nodes
        if self.has_edge(n1, n2) and self.has_zero_boundaries:
            self.refine_post_merge_boundaries(n1, n2)
        self.rig[n1] += self.rig[n2]
        self.rig[n2] = 0
        self.remove_node(n2)

    def refine_post_merge_boundaries(self, n1, n2):
        boundary = array(list(self[n1][n2]['boundary']))
        boundary_neighbor_pixels = self.segmentation_r[self.neighbor_idxs(
            boundary)]
        add = ((boundary_neighbor_pixels == 0) +
               (boundary_neighbor_pixels == n1) +
               (boundary_neighbor_pixels == n2)).all(axis=1)
        check = True - add
        self.node[n1]['extent'].update(boundary[add])
        boundary_probs = self.probabilities_r[boundary[add]]
        self.feature_manager.pixelwise_update_node_cache(
            self, n1, self.node[n1]['feature-cache'], boundary[add])
        self.segmentation_r[boundary[add]] = n1
        boundaries_to_edit = {}
        for px in boundary[check]:
            for lb in unique(self.segmentation_r[self.neighbor_idxs(px)]):
                if lb not in [0, n1, self.boundary_body]:
                    try:
                        boundaries_to_edit[(n1, lb)].append(px)
                    except KeyError:
                        boundaries_to_edit[(n1, lb)] = [px]
        for u, v in boundaries_to_edit.keys():
            idxs = set(boundaries_to_edit[(u, v)])
            if self.has_edge(u, v):
                idxs = idxs - self[u][v]['boundary']
                self[u][v]['boundary'].update(idxs)
                self.feature_manager.pixelwise_update_edge_cache(
                    self, u, v, self[u][v]['feature-cache'], list(idxs))
            else:
                self.add_edge(u, v, boundary=set(idxs))
                self[u][v]['feature-cache'] = \
                    self.feature_manager.create_edge_cache(self, u, v)
            self.update_merge_queue(u, v)
        for n in self.neighbors(n2):
            if not boundaries_to_edit.has_key((n1, n)) and n != n1:
                self.update_merge_queue(n1, n)

    def merge_subgraph(self, subgraph=None, source=None):
        if type(subgraph) not in [Rag, Graph]:  # input is node list
            subgraph = self.subgraph(subgraph)
        if len(subgraph) > 0:
            node_dfs = list(dfs_preorder_nodes(subgraph, source))
            # dfs_preorder_nodes returns iter, convert to list
            source_node, other_nodes = node_dfs[0], node_dfs[1:]
            for current_node in other_nodes:
                self.merge_nodes(source_node, current_node)

    def split_node(self, u, n=2, **kwargs):
        node_extent = list(self.node[u]['extent'])
        node_borders = set().union(
            *[self[u][v]['boundary'] for v in self.neighbors(u)])
        labels = unique(self.watershed_r[node_extent])
        if labels[0] == 0:
            labels = labels[1:]
        self.remove_node(u)
        self.build_graph_from_watershed(
            idxs=array(list(set().union(node_extent, node_borders))))
        self.ncut(num_clusters=n, nodes=labels, **kwargs)

    def merge_edge_properties(self, src, dst):
        """Merge the properties of edge src into edge dst."""
        u, v = dst
        w, x = src
        if not self.has_edge(u, v):
            self.add_edge(u, v, attr_dict=self[w][x])
        else:
            self[u][v]['boundary'].update(self[w][x]['boundary'])
            self.feature_manager.update_edge_cache(self, (u, v), (w, x),
                                                   self[u][v]['feature-cache'],
                                                   self[w][x]['feature-cache'])
        try:
            self.merge_queue.invalidate(self[w][x]['qlink'])
        except KeyError:
            pass
        self.update_merge_queue(u, v)

    def update_merge_queue(self, u, v):
        """Update the merge queue item for edge (u,v). Add new by default."""
        if self.boundary_body in [u, v]:
            return
        if self[u][v].has_key('qlink'):
            self.merge_queue.invalidate(self[u][v]['qlink'])
        if not self.merge_queue.is_null_queue:
            w = self.merge_priority_function(self, u, v)
            new_qitem = [w, True, u, v]
            self[u][v]['qlink'] = new_qitem
            self[u][v]['weight'] = w
            self.merge_queue.push(new_qitem)

    def get_segmentation(self):
        return morpho.juicy_center(self.segmentation, self.pad_thickness)

    def get_ucm(self):
        if hasattr(self, 'ignored_boundary'):
            self.ucm[self.ignored_boundary] = self.max_merge_score
        return morpho.juicy_center(self.ucm, self.pad_thickness)

    def build_volume(self, nbunch=None):
        """Return the segmentation (numpy.ndarray) induced by the graph."""
        v = zeros_like(self.watershed)
        vr = v.ravel()
        if nbunch is None:
            nbunch = self.nodes()
        for n in nbunch:
            vr[list(self.node[n]['extent'])] = n
        return morpho.juicy_center(v, self.pad_thickness)

    def build_boundary_map(self, ebunch=None):
        if len(self.merge_queue) == 0:
            self.rebuild_merge_queue()
        m = zeros(self.watershed.shape, double)
        mr = m.ravel()
        if ebunch is None:
            ebunch = self.real_edges_iter()
        ebunch = sorted([(self[u][v]['weight'], u, v) for u, v in ebunch])
        for w, u, v in ebunch:
            b = list(self[u][v]['boundary'])
            mr[b] = w
        if hasattr(self, 'ignored_boundary'):
            m[self.ignored_boundary] = inf
        return morpho.juicy_center(m, self.pad_thickness)

    def remove_obvious_inclusions(self):
        """Merge any nodes with only one edge to their neighbors."""
        for n in self.nodes():
            if self.degree(n) == 1:
                self.merge_nodes(self.neighbors(n)[0], n)

    def remove_inclusions(self):
        """Merge any segments fully contained within other segments."""
        bcc = list(biconnected_components(self))
        container = [i for i, s in enumerate(bcc)
                     if self.boundary_body in s][0]
        del bcc[container]  # remove the main graph
        bcc = map(list, bcc)
        for cc in bcc:
            cc.sort(key=lambda x: len(self.node[x]['extent']), reverse=True)
        bcc.sort(key=lambda x: len(self.node[x[0]]['extent']))
        for cc in bcc:
            self.merge_subgraph(cc, cc[0])

    def orphans(self):
        """List of all the nodes that do not touch the volume boundary."""
        return [n for n in self.nodes() if not self.at_volume_boundary(n)]

    def is_traversed_by_node(self, n):
        """Determine whether a body traverses the volume.
        
        This is defined as touching the volume boundary at two distinct 
        locations.
        """
        if not self.at_volume_boundary(n) or n == self.boundary_body:
            return False
        v = zeros(self.segmentation.shape, uint8)
        v.ravel()[list(self[n][self.boundary_body]['boundary'])] = 1
        _, n = label(v, ones([3] * v.ndim))
        return n > 1

    def traversing_bodies(self):
        """List all bodies that traverse the volume."""
        return [n for n in self.nodes() if self.is_traversed_by_node(n)]

    def non_traversing_bodies(self):
        """List bodies that are not orphans and do not traverse the volume."""
        return [
            n for n in self.nodes()
            if self.at_volume_boundary(n) and not self.is_traversed_by_node(n)
        ]

    def raveler_body_annotations(self, traverse=False):
        """Return JSON-compatible dict formatted for Raveler annotations."""
        orphans = self.orphans()
        non_traversing_bodies = self.non_traversing_bodies(
        ) if traverse else []
        data = \
            [{'status':'not sure', 'comment':'orphan', 'body ID':int(o)}
                for o in orphans] +\
            [{'status':'not sure', 'comment':'does not traverse',
                'body ID':int(n)} for n in non_traversing_bodies]
        metadata = {'description': 'body annotations', 'file version': 2}
        return {'data': data, 'metadata': metadata}

    def at_volume_boundary(self, n):
        """Return True if node n touches the volume boundary."""
        return self.has_edge(n, self.boundary_body) or n == self.boundary_body

    def should_merge(self, n1, n2):
        return self.rig[n1].argmax() == self.rig[n2].argmax()

    def get_pixel_label(self, n1, n2):
        boundary = array(list(self[n1][n2]['boundary']))
        min_idx = boundary[self.probabilities_r[boundary, 0].argmin()]
        if self.should_merge(n1, n2):
            return min_idx, 2
        else:
            return min_idx, 1

    def pixel_labels_array(self, false_splits_only=False):
        ar = zeros_like(self.watershed_r)
        labels = [self.get_pixel_label(*e) for e in self.real_edges()]
        if false_splits_only:
            labels = [l for l in labels if l[1] == 2]
        ids, ls = map(array, zip(*labels))
        ar[ids] = ls.astype(ar.dtype)
        return ar.reshape(self.watershed.shape)

    def split_vi(self, gt=None):
        if self.gt is None and gt is None:
            return array([0, 0])
        elif self.gt is not None:
            return split_vi(None, None, self.rig)
        else:
            return split_vi(self.get_segmentation(), gt, None, [0], [0])

    def boundary_indices(self, n1, n2):
        return list(self[n1][n2]['boundary'])

    def get_edge_coordinates(self, n1, n2, arbitrary=False):
        """Find where in the segmentation the edge (n1, n2) is most visible."""
        return get_edge_coordinates(self, n1, n2, arbitrary)

    def write(self, fout, output_format='GraphML'):
        if output_format == 'Plaza JSON':
            self.write_plaza_json(fout)
        else:
            raise ValueError('Unsupported output format for agglo.Rag: %s' %
                             output_format)

    def write_plaza_json(self, fout):
        """Write graph to Steve Plaza's JSON spec."""
        edge_list = [{
            'location':
            map(int,
                self.get_edge_coordinates(i, j)[-1::-1]),
            'node1':
            int(i),
            'node2':
            int(j),
            'edge_size':
            len(self[i][j]['boundary']),
            'size1':
            len(self.node[i]['extent']),
            'size2':
            len(self.node[j]['extent']),
            'weight':
            float(self[i][j]['weight'])
        } for i, j in self.real_edges()]
        with open(fout, 'w') as f:
            json.dump({'edge_list': edge_list}, f, indent=4)

    def ncut(self,
             num_clusters=10,
             kmeans_iters=5,
             sigma=255.0 * 20,
             nodes=None,
             **kwargs):
        """Run normalized cuts on the current set of superpixels.
           Keyword arguments:
               num_clusters -- number of clusters to compute
               kmeans_iters -- # iterations to run kmeans when clustering
               sigma -- sigma value when setting up weight matrix
           Return value: None
        """
        if nodes is None:
            nodes = self.nodes()
        # Compute weight matrix
        W = self.compute_W(self.merge_priority_function, nodes=nodes)
        # Run normalized cut
        labels, eigvec, eigval = ncutW(W, num_clusters, kmeans_iters, **kwargs)
        # Merge nodes that are in same cluster
        self.cluster_by_labels(labels, nodes)

    def cluster_by_labels(self, labels, nodes=None):
        """Merge all superpixels with the same label (1 label per 1 sp)"""
        if nodes is None:
            nodes = array(self.nodes())
        if not (len(labels) == len(nodes)):
            raise ValueError('Number of labels should be %d but is %d.',
                             self.number_of_nodes(), len(labels))
        for l in unique(labels):
            inds = nonzero(labels == l)[0]
            nodes_to_merge = nodes[inds]
            node1 = nodes_to_merge[0]
            for node in nodes_to_merge[1:]:
                self.merge_nodes(node1, node)

    def compute_W(self, merge_priority_function, sigma=255.0 * 20, nodes=None):
        """ Computes the weight matrix for clustering"""
        if nodes is None:
            nodes = array(self.nodes())
        n = len(nodes)
        nodes2ind = dict(zip(nodes, range(n)))
        W = lil_matrix((n, n))
        for u, v in self.real_edges(nodes):
            try:
                i, j = nodes2ind[u], nodes2ind[v]
            except KeyError:
                continue
            w = merge_priority_function(self, u, v)
            W[i, j] = W[j, i] = exp(-w**2 / sigma)
        return W
Beispiel #18
0
def test_askpr_closed():
    repo = FakeGHRepo(injected_prs=[FakeGHPullRequest(14, state=CLOSED)])
    mq = MergeQueue(repo)
    with pytest.raises(MergeQueueException):
        mq.ask_pr(14)
    assert len(mq.queue) == 0
Beispiel #19
0
class Rag(Graph):
    """Region adjacency graph for segmentation of nD volumes."""

    def __init__(self, watershed=array([]), probabilities=array([]), 
            merge_priority_function=None, allow_shared_boundaries=True,
            gt_vol=None, feature_manager=MomentsFeatureManager(), 
            show_progress=False, lowmem=False, connectivity=1,
            channel_is_oriented=None, orientation_map=array([]),
            normalize_probabilities=False):
        """Create a graph from a watershed volume and image volume.
        
        The watershed is assumed to have dams of label 0 in between basins.
        Then, each basin corresponds to a node in the graph and an edge is
        placed between two nodes if there are one or more watershed pixels
        connected to both corresponding basins.
        """
        super(Rag, self).__init__(weighted=False)
        self.show_progress = show_progress
        if merge_priority_function is None:
            self.merge_priority_function = boundary_mean
        else:
            self.merge_priority_function = merge_priority_function
        self.set_watershed(watershed, lowmem, connectivity)
        self.set_probabilities(probabilities, normalize_probabilities)
        self.set_orientations(orientation_map, channel_is_oriented)
        if watershed is None:
            self.ucm = None
        else:
            self.ucm = array(self.watershed==0, dtype=float)
            self.ucm[self.ucm==0] = -inf
            self.ucm_r = self.ucm.ravel()
        self.max_merge_score = -inf
        self.build_graph_from_watershed(allow_shared_boundaries)
        self.set_feature_manager(feature_manager)
        self.set_ground_truth(gt_vol)
        self.merge_queue = MergeQueue()

    def __copy__(self):
        if sys.version_info[:2] < (2,7):
            # Python versions prior to 2.7 don't handle deepcopy of function
            # objects well. Thus, keep a reference and remove from Rag object
            f = self.neighbor_idxs; del self.neighbor_idxs
            F = self.feature_manager; del self.feature_manager
        pr_shape = self.probabilities_r.shape
        g = super(Rag, self).copy()
        g.watershed_r = g.watershed.ravel()
        g.segmentation_r = g.segmentation.ravel()
        g.ucm_r = g.ucm.ravel()
        g.probabilities_r = g.probabilities.reshape(pr_shape)
        if sys.version_info[:2] < (2,7):
            g.neighbor_idxs = f
            self.neighbor_idxs = f
            g.feature_manager = F
            self.feature_manager = F
        return g

    def copy(self):
        return self.__copy__()

    def real_edges(self, *args, **kwargs):
        return [e for e in super(Rag, self).edges(*args, **kwargs) if
                                            self.boundary_body not in e[:2]]

    def real_edges_iter(self, *args, **kwargs):
        return (e for e in super(Rag, self).edges_iter(*args, **kwargs) if
                                            self.boundary_body not in e[:2])

    def build_graph_from_watershed(self, 
                                    allow_shared_boundaries=True, idxs=None):
        if self.watershed.size == 0: return # stop processing for empty graphs
        if not allow_shared_boundaries:
            self.ignored_boundary = zeros(self.watershed.shape, bool)
        if idxs is None:
            idxs = arange(self.watershed.size)
            self.add_node(self.boundary_body, 
                    extent=set(flatnonzero(self.watershed==self.boundary_body)))
        inner_idxs = idxs[self.watershed_r[idxs] != self.boundary_body]
        pbar = ip.StandardProgressBar() if self.show_progress \
                                        else ip.NoProgressBar()
        for idx in ip.with_progress(inner_idxs, title='Graph... ', pbar=pbar):
            ns = self.neighbor_idxs(idx)
            adj_labels = self.watershed_r[ns]
            adj_labels = unique(adj_labels)
            adj_labels = adj_labels[adj_labels.nonzero()]
            nodeid = self.watershed_r[idx]
            if nodeid != 0:
                adj_labels = adj_labels[adj_labels != nodeid]
                edges = zip(repeat(nodeid), adj_labels)
                if not self.has_node(nodeid):
                    self.add_node(nodeid, extent=set())
                try:
                    self.node[nodeid]['extent'].add(idx)
                except KeyError:
                    self.node[nodeid]['extent'] = set([idx])
            else:
                if len(adj_labels) == 0: continue
                if adj_labels[-1] != self.boundary_body:
                    edges = list(combinations(adj_labels, 2))
                else:
                    edges = list(product([self.boundary_body], adj_labels[:-1]))
            if allow_shared_boundaries or len(edges) == 1:
                for l1,l2 in edges:
                    if self.has_edge(l1, l2): 
                        self[l1][l2]['boundary'].add(idx)
                    else: 
                        self.add_edge(l1, l2, boundary=set([idx]))
            elif len(edges) > 1:
                self.ignored_boundary.ravel()[idx] = True

    def set_feature_manager(self, feature_manager):
        self.feature_manager = feature_manager
        if len(self.feature_manager) > 0:
            self.compute_feature_caches()

    def compute_feature_caches(self):
        for n in self.nodes_iter():
            self.node[n]['feature-cache'] = \
                            self.feature_manager.create_node_cache(self, n)
        for n1, n2 in self.edges_iter():
            self[n1][n2]['feature-cache'] = \
                            self.feature_manager.create_edge_cache(self, n1, n2)

    def get_neighbor_idxs_fast(self, idxs):
        return self.pixel_neighbors[idxs]

    def get_neighbor_idxs_lean(self, idxs, connectivity=1):
        return morpho.get_neighbor_idxs(self.watershed, idxs, connectivity)

    def set_probabilities(self, probs=array([]), normalize=False):
        if len(probs) == 0:
            self.probabilities = zeros_like(self.watershed)
            self.probabilities_r = self.probabilities.ravel()
        probs = probs.astype(double)
        if normalize and len(probs) > 1:
            probs -= probs.min() # ensure probs.min() == 0
            probs /= probs.max() # ensure probs.max() == 1
        sp = probs.shape
        sw = tuple(array(self.watershed.shape, dtype=int)-\
                    2*self.pad_thickness*ones(self.watershed.ndim, dtype=int))
        p_ndim = probs.ndim
        w_ndim = self.watershed.ndim
        padding = [inf]+(self.pad_thickness-1)*[0]
        if p_ndim == w_ndim:
            self.probabilities = morpho.pad(probs, padding)
            self.probabilities_r = self.probabilities.ravel()[:,newaxis]
        elif p_ndim == w_ndim+1:
            if sp[1:] == sw:
                sp = sp[1:]+[sp[0]]
                probs = probs.transpose(sp)
            axes = range(p_ndim-1)
            self.probabilities = morpho.pad(probs, padding, axes)
            self.probabilities_r = self.probabilities.reshape(
                                                (self.watershed.size, -1))

    def set_orientations(self, orientation_map, channel_is_oriented):
        if len(orientation_map) == 0:
            self.orientation_map = zeros_like(self.watershed)
            self.orientation_map_r = self.orientation_map.ravel()
        so = orientation_map.shape
        sw = tuple(array(self.watershed.shape, dtype=int)-\
                2*self.pad_thickness*ones(self.watershed.ndim, dtype=int))
        o_ndim = orientation_map.ndim
        w_ndim = self.watershed.ndim
        padding = [0]+(self.pad_thickness-1)*[0]
        self.orientation_map = morpho.pad(orientation_map, padding).astype(int)
        self.orientation_map_r = self.orientation_map.ravel()
        if channel_is_oriented is None:
            nchannels = 1 if self.probabilities.ndim==self.watershed.ndim \
                else self.probabilities.shape[-1]
            self.channel_is_oriented = array([False]*nchannels)
            self.max_probabilities_r = zeros_like(self.probabilities_r)
            self.oriented_probabilities_r = zeros_like(self.probabilities_r)
            self.non_oriented_probabilities_r = self.probabilities_r
        else:
            self.channel_is_oriented = channel_is_oriented
            self.max_probabilities_r = \
                self.probabilities_r[:, self.channel_is_oriented].max(axis=1)
            self.oriented_probabilities_r = \
                self.probabilities_r[:, self.channel_is_oriented]
            self.oriented_probabilities_r = \
                self.oriented_probabilities_r[
                    range(len(self.oriented_probabilities_r)), 
                    self.orientation_map_r]
            self.non_oriented_probabilities_r = \
                self.probabilities_r[:, ~self.channel_is_oriented]


    def set_watershed(self, ws=array([]), lowmem=False, connectivity=1):
        try:
            self.boundary_body = ws.max()+1
        except ValueError: # empty watershed given
            self.boundary_body = -1
        self.volume_size = ws.size
        self.has_zero_boundaries = (ws==0).any()
        if self.has_zero_boundaries:
            self.watershed = morpho.pad(ws, [0, self.boundary_body])
        else:
            self.watershed = morpho.pad(ws, self.boundary_body)
        self.segmentation = self.watershed.copy()
        self.watershed_r = self.watershed.ravel()
        self.segmentation_r = self.segmentation.ravel() # reduce fct calls
        self.pad_thickness = 2 if (self.segmentation==0).any() else 1
        if lowmem:
            def neighbor_idxs(x): 
                return self.get_neighbor_idxs_lean(x, connectivity)
            self.neighbor_idxs = neighbor_idxs
        else:
            self.pixel_neighbors = \
                morpho.build_neighbors_array(self.watershed, connectivity)
            self.neighbor_idxs = self.get_neighbor_idxs_fast

    def set_ground_truth(self, gt=None):
        if gt is not None:
            gtm = gt.max()+1
            gt_ignore = [0, gtm] if (gt==0).any() else [gtm]
            seg_ignore = [0, self.boundary_body] if \
                        (self.segmentation==0).any() else [self.boundary_body]
            self.gt = morpho.pad(gt, gt_ignore)
            self.rig = contingency_table(self.segmentation, self.gt)
            self.rig[:, gt_ignore] = 0
            self.rig[seg_ignore, :] = 0
        else:
            self.gt = None
            # null pattern to transparently allow merging of nodes.
            # Bonus feature: counts how many sp's went into a single node.
            try:
                self.rig = ones(self.watershed.max()+1)
            except ValueError:
                self.rig = ones(self.number_of_nodes()+1)

    def build_merge_queue(self):
        """Build a queue of node pairs to be merged in a specific priority.
        
        The queue elements have a specific format in order to allow 'removing'
        of specific elements inside the priority queue. Each element is a list
        of length 4 containing:
            - the merge priority (any ordered type)
            - a 'valid' flag
            - and the two nodes in arbitrary order
        The valid flag allows one to "remove" elements by setting the flag to
        False. Then one checks the flag when popping elements and ignores those
        marked as invalid.

        One other specific feature is that there are back-links from edges to
        their corresponding queue items so that when nodes are merged,
        affected edges can be invalidated and reinserted in the queue.
        """
        queue_items = []
        for l1, l2 in self.real_edges_iter():
            w = self.merge_priority_function(self,l1,l2)
            qitem = [w, True, l1, l2]
            queue_items.append(qitem)
            self[l1][l2]['qlink'] = qitem
            self[l1][l2]['weight'] = w
        return MergeQueue(queue_items, with_progress=self.show_progress)

    def rebuild_merge_queue(self):
        """Build a merge queue from scratch and assign to self.merge_queue."""
        self.merge_queue = self.build_merge_queue()

    def agglomerate(self, threshold=0.5, save_history=False):
        """Merge nodes sequentially until given edge confidence threshold."""
        if self.merge_queue.is_empty():
            self.merge_queue = self.build_merge_queue()
        history, scores, evaluation = [], [], []
        while len(self.merge_queue) > 0 and \
                                        self.merge_queue.peek()[0] < threshold:
            merge_priority, valid, n1, n2 = self.merge_queue.pop()
            if valid:
                self.update_ucm(n1,n2,merge_priority)
                self.merge_nodes(n1,n2)
                if save_history: 
                    history.append((n1,n2))
                    scores.append(merge_priority)
                    evaluation.append(
                        (self.number_of_nodes()-1, self.split_vi())
                    )
        if save_history:
            return history, scores, evaluation

    def agglomerate_count(self, stepsize=100, save_history=False):
        """Agglomerate until 'stepsize' merges have been made."""
        if self.merge_queue.is_empty():
            self.merge_queue = self.build_merge_queue()
        history, evaluation = [], []
        i = 0
        while len(self.merge_queue) > 0 and i < stepsize:
            merge_priority, valid, n1, n2 = self.merge_queue.pop()
            if valid:
                i += 1
                self.update_ucm(n1,n2,merge_priority)
                self.merge_nodes(n1,n2)
                if save_history: 
                    history.append((n1,n2))
                    evaluation.append(
                        (self.number_of_nodes()-1, self.split_vi())
                    )
        if save_history:
            return history, evaluation
        
    def agglomerate_ladder(self, threshold=1000, strictness=1):
        """Merge sequentially all nodes smaller than threshold.
        
        strictness = 1 only considers size of nodes
        strictness = 2 adds additional constraint: small nodes can only be 
        merged to large neighbors
        strictness = 3 additionally requires that the boundary between nodes
        be larger than 2 pixels
        Note: nodes that are on the volume boundary are not agglomerated.
        """
        original_merge_priority_function = self.merge_priority_function
        self.merge_priority_function = make_ladder(
            self.merge_priority_function, threshold, strictness
        )
        self.rebuild_merge_queue()
        self.agglomerate(inf)
        self.merge_priority_function = original_merge_priority_function
        self.merge_queue.finish()
        self.rebuild_merge_queue()
        
    def one_shot_agglomeration(self, threshold=0.5):
        g = self.copy()
        if len(g.merge_queue) == 0:
            g.rebuild_merge_queue()
        for u, v, d in g.edges(data=True):
            if g.boundary_body in [u,v] or d['weight'] > threshold:
                g.remove_edge(u, v)
        ccs = connected_components(g)
        for cc in ccs:
            g.merge_subgraph(cc)
        return g.get_segmentation()

    def assign_gt_to_ws(self, gt):
        ws_nopad = morpho.juicy_center(self.watershed, self.pad_thickness)
        bdrymap = morpho.pad(morpho.seg_to_bdry(ws_nopad), 
                                                    [0]*self.pad_thickness)
        gt_bdrymap_nopad = morpho.seg_to_bdry(gt)
        gt_bdrymap = morpho.pad(gt_bdrymap_nopad, [0]*self.pad_thickness)
        k = distance_transform_cdt(1-bdrymap, return_indices=True)
        ind = nonzero(gt_bdrymap.ravel())[0]
        closest_sub = numpy.concatenate(
                                [i.ravel()[:,newaxis] for i in k[1]],axis=1)
        closest_sub = closest_sub[ind,:]
        closest_ind = [dot(bdrymap.strides, i)/bdrymap.itemsize 
                                                        for i in closest_sub]
        M = zeros_like(bdrymap).astype(float)
        M.ravel()[closest_ind]=1.0
        bdrymap.ravel()[closest_ind] = False
        k = distance_transform_cdt(1-bdrymap, return_indices=True)
        ind = nonzero(gt_bdrymap.ravel())[0]
        closest_sub = numpy.concatenate(
                                [i.ravel()[:,newaxis] for i in k[1]],axis=1)
        closest_sub = closest_sub[ind,:]
        closest_ind = [dot(bdrymap.strides, i)/bdrymap.itemsize 
                                                        for i in closest_sub]
        M.ravel()[closest_ind]=1.0 
        return M 
        
    def learn_agglomerate(self, gts, feature_map, min_num_samples=1,
                                                            *args, **kwargs):
        """Agglomerate while comparing to ground truth & classifying merges."""
        learn_flat = kwargs.get('learn_flat', True)
        learning_mode = kwargs.get('learning_mode', 'strict').lower()
        labeling_mode = kwargs.get('labeling_mode', 'assignment').lower()
        priority_mode = kwargs.get('priority_mode', 'random').lower()
        memory = kwargs.get('memory', True)
        unique = kwargs.get('unique', True)
        max_numepochs = kwargs.get('max_numepochs', 10)
        if priority_mode == 'mean' and unique: 
            max_numepochs = 2 if learn_flat else 1
        if priority_mode in ['random', 'mean'] and not memory:
            max_numepochs = 1
        label_type_keys = {'assignment':0, 'vi-sign':1, 
                                                'rand-sign':2, 'boundary':3}
        if type(gts) != list:
            gts = [gts] # allow using single ground truth as input
        master_ctables = \
                [contingency_table(self.get_segmentation(), gt) for gt in gts]
        # Match the watershed to the ground truths
        ws_is_gt = zeros_like(self.watershed).astype(float)
        for gt in gts:
            ws_is_gt += self.assign_gt_to_ws(gt)
        ws_is_gt /= float(len(gts))
        ws_is_gt = ws_is_gt>0.5
        alldata = []
        data = [[],[],[],[]]
        for numepochs in range(max_numepochs):
            ctables = deepcopy(master_ctables)
            if len(data[0]) > min_num_samples:
                break
            if learn_flat and numepochs == 0:
                alldata.append(self.learn_flat(gts, feature_map, ws_is_gt))
                data = unique_learning_data_elements(alldata) if unique else \
                       alldata[-1]
                continue
            g = self.copy()
            if priority_mode == 'mean':
                g.merge_priority_function = boundary_mean
            elif numepochs > 0 and priority_mode == 'active' or \
                numepochs % 2 == 1 and priority_mode == 'mixed':
                cl = kwargs.get('classifier', RandomForest())
                cl = cl.fit(data[0], data[1][:,label_type_keys[labeling_mode]])
                if type(cl) == RandomForest:
                    logging.info('classifier oob error: %.2f'%cl.oob)
                g.merge_priority_function = \
                                        classifier_probability(feature_map, cl)
            elif priority_mode == 'random' or \
                (priority_mode == 'active' and numepochs == 0):
                g.merge_priority_function = random_priority
            elif priority_mode == 'custom':
                g.merge_priority_function = kwargs.get('mpf', boundary_mean)
            g.show_progress = False # bug in MergeQueue usage causes
                                    # progressbar crash.
            g.rebuild_merge_queue()
            alldata.append(g._learn_agglomerate(ctables, feature_map, ws_is_gt,
                                                learning_mode, labeling_mode))
            if memory:
                if unique:
                    data = unique_learning_data_elements(alldata) 
                else:
                    data = concatenate_data_elements(alldata)
            else:
                data = alldata[-1]
            logging.debug('data size %d at epoch %d'%(len(data[0]), numepochs))
        return data, alldata

    def learn_flat(self, gts, feature_map, ws_is_gt, *args, **kwargs):
        if type(gts) != list:
            gts = [gts] # allow using single ground truth as input
        ctables = [contingency_table(self.get_segmentation(), gt) for gt in gts]
        assignments = [(ct == ct.max(axis=1)[:,newaxis]) for ct in ctables]
        return map(array, zip(*[
                self.learn_edge(e, ctables, assignments, feature_map, ws_is_gt)
                for e in self.real_edges()]))

    def learn_edge(self, edge, ctables, assignments, feature_map, ws_is_gt,
            boundary_overlap_thresh=0.3):
        n1, n2 = edge
        features = feature_map(self, n1, n2).ravel()
        # Calculate weights for weighting data points
        s1, s2 = [len(self.node[n]['extent']) for n in [n1, n2]]
        weights = \
            compute_local_vi_change(s1, s2, self.volume_size), \
            compute_local_rand_change(s1, s2, self.volume_size)
        # Get the fraction of times that n1 and n2 assigned to
        # same segment in the ground truths
        cont_labels = [
            [(-1)**(a[n1,:]==a[n2,:]).all() for a in assignments],
            [compute_true_delta_vi(ctable, n1, n2) for ctable in ctables],
            [-compute_true_delta_rand(ctable, n1, n2, self.volume_size) 
                                                    for ctable in ctables],
            [(self.compute_boundary_overlap_with_gt(n1,n2, ws_is_gt)>
                                            boundary_overlap_thresh)*2 - 1] 
        ]
        labels = [sign(mean(cont_label)) for cont_label in cont_labels]
        if any(map(isnan, labels)) or any([label == 0 for l in labels]):
            logging.debug('NaN or 0 labels found. ' + 
                                    ' '.join(map(str, [labels, (n1, n2)])))
        labels = [1 if i==0 or isnan(i) else i for i in labels]
        return features, labels, weights, (n1,n2)

    def compute_boundary_overlap_with_gt(self, n1, n2, ws_is_gt):
        val = []
        val = ws_is_gt.ravel()[list(self[n1][n2]['boundary'])]
        return sum(val)/float(len(val)) 
    
    def _learn_agglomerate(self, ctables, feature_map, gt_dts, 
                        learning_mode='forbidden', labeling_mode='assignment'):
        """Learn the agglomeration process using various strategies.

        Arguments:
            - one or more contingency tables between own segments and gold
            standard segmentations
            - a feature map function {Graph, node1, node2} |--> array([float])
            [- a learning mode]

        Value:
            A learning data matrix of shape 
            [n_training_examples x (n_features + 5)]. The elements after the
            features are the label, the approximate magnitude of the variation
            of information (VI) change, the approximate magnitude of the Rand
            index (RI) change, and the two nodes that were sampled.

        Learning modes:
            - strict: use positive-boundary examples to learn but never
            merge
            - loose: merge regardless of label
        Labeling modes:
            - assignment: assign each node to a gold standard node and 
            - vi-sign: compute the vi change resulting from merging candidate
            regions. Use the sign of the change as the training label.
            - rand-sign: compute the rand change resulting from merging the
            candidate regions. Use the sign of the change as the training
            label.
        """
        label_type_keys = {'assignment':0, 'vi-sign':1, 'rand-sign':2, 
                                                                'boundary':3}
        assignments = [(ct == ct.max(axis=1)[:,newaxis]) for ct in ctables]
        g = self
        data = []
        while len(g.merge_queue) > 0:
            merge_priority, valid, n1, n2 = g.merge_queue.pop()
            if valid:
                dat = g.learn_edge(
                            (n1,n2), ctables, assignments, feature_map, gt_dts)
                data.append(dat)
                label = dat[1][label_type_keys[labeling_mode]]
                if learning_mode != 'strict' or label < 0:
                    for ctable, assignment in zip(ctables, assignments):
                        ctable[n1] += ctable[n2]
                        ctable[n2] = 0
                        assignment[n1] = ctable[n1] == ctable[n1].max()
                        assignment[n2] = 0
                    g.merge_nodes(n1, n2)
        return map(array, zip(*data))

    def replay_merge_history(self, merge_seq, labels=None, num_errors=1):
        """Agglomerate according to a merge sequence, optionally labeled.
        
        The merge sequence and labels _must_ be generators if you don't want
        to manually keep track of how much has been consumed. The merging
        continues until num_errors false merges have been encountered, or 
        until the sequence is fully consumed.
        
        labels are -1 or 0 for 'should merge', 1 for 'should not merge'.
        
        Return value: number of elements consumed from merge_seq, and last
        merge pair observed.
        """
        if labels is None:
            labels1 = itertools.repeat(False)
            labels2 = itertools.repeat(False)
        else:
            labels1 = (label > 0 for label in labels)
            labels2 = (label > 0 for label in labels)
        counter = itertools.count()
        errors_remaining = conditional_countdown(labels2, num_errors)
        nodes = None
        for nodes, label, errs, count in \
                        izip(merge_seq, labels1, errors_remaining, counter):
            n1, n2 = nodes
            if not label:
                self.merge_nodes(n1, n2)
            elif errs == 0:
                break
        return count, nodes

    def update_ucm(self, n1, n2, score=-inf):
        """Update ultrametric contour map."""
        if self.ucm is not None:
            self.max_merge_score = max(self.max_merge_score, score)
            idxs = list(self[n1][n2]['boundary'])
            self.ucm_r[idxs] = self.max_merge_score

    def merge_nodes(self, n1, n2):
        """Merge two nodes, while updating the necessary edges."""
        self.node[n1]['extent'].update(self.node[n2]['extent'])
        self.feature_manager.update_node_cache(self, n1, n2,
                self.node[n1]['feature-cache'], self.node[n2]['feature-cache'])
        self.segmentation_r[list(self.node[n2]['extent'])] = n1
        new_neighbors = [n for n in self.neighbors(n2)
                                        if n not in [n1, self.boundary_body]]
        for n in new_neighbors:
            self.merge_edge_properties((n2,n), (n1,n))
        # this if statement enables merging of non-adjacent nodes
        if self.has_edge(n1,n2) and self.has_zero_boundaries:
            self.refine_post_merge_boundaries(n1, n2)
        self.rig[n1] += self.rig[n2]
        self.rig[n2] = 0
        self.remove_node(n2)

    def refine_post_merge_boundaries(self, n1, n2):
        boundary = array(list(self[n1][n2]['boundary']))
        boundary_neighbor_pixels = self.segmentation_r[
            self.neighbor_idxs(boundary)
        ]
        add = ( (boundary_neighbor_pixels == 0) + 
            (boundary_neighbor_pixels == n1) + 
            (boundary_neighbor_pixels == n2) ).all(axis=1)
        check = True-add
        self.node[n1]['extent'].update(boundary[add])
        boundary_probs = self.probabilities_r[boundary[add]]
        self.feature_manager.pixelwise_update_node_cache(self, n1,
                        self.node[n1]['feature-cache'], boundary[add])
        self.segmentation_r[boundary[add]] = n1
        boundaries_to_edit = {}
        for px in boundary[check]:
            for lb in unique(
                        self.segmentation_r[self.neighbor_idxs(px)]):
                if lb not in [0, n1, self.boundary_body]:
                    try:
                        boundaries_to_edit[(n1,lb)].append(px)
                    except KeyError:
                        boundaries_to_edit[(n1,lb)] = [px]
        for u, v in boundaries_to_edit.keys():
            idxs = set(boundaries_to_edit[(u,v)])
            if self.has_edge(u, v):
                idxs = idxs - self[u][v]['boundary']
                self[u][v]['boundary'].update(idxs)
                self.feature_manager.pixelwise_update_edge_cache(self, u, v,
                                    self[u][v]['feature-cache'], list(idxs))
            else:
                self.add_edge(u, v, boundary=set(idxs))
                self[u][v]['feature-cache'] = \
                    self.feature_manager.create_edge_cache(self, u, v)
            self.update_merge_queue(u, v)
        for n in self.neighbors(n2):
            if not boundaries_to_edit.has_key((n1,n)) and n != n1:
                self.update_merge_queue(n1, n)

    def merge_subgraph(self, subgraph=None, source=None):
        if type(subgraph) not in [Rag, Graph]: # input is node list
            subgraph = self.subgraph(subgraph)
        if len(subgraph) > 0:
            node_dfs = list(dfs_preorder_nodes(subgraph, source)) 
            # dfs_preorder_nodes returns iter, convert to list
            source_node, other_nodes = node_dfs[0], node_dfs[1:]
            for current_node in other_nodes:
                self.merge_nodes(source_node, current_node)

    def split_node(self, u, n=2, **kwargs):
        node_extent = list(self.node[u]['extent'])
        node_borders = set().union(
                        *[self[u][v]['boundary'] for v in self.neighbors(u)])
        labels = unique(self.watershed_r[node_extent])
        if labels[0] == 0:
            labels = labels[1:]
        self.remove_node(u)
        self.build_graph_from_watershed(
            idxs=array(list(set().union(node_extent, node_borders)))
        )
        self.ncut(num_clusters=n, nodes=labels, **kwargs)

    def merge_edge_properties(self, src, dst):
        """Merge the properties of edge src into edge dst."""
        u, v = dst
        w, x = src
        if not self.has_edge(u,v):
            self.add_edge(u, v, attr_dict=self[w][x])
        else:
            self[u][v]['boundary'].update(self[w][x]['boundary'])
            self.feature_manager.update_edge_cache(self, (u, v), (w, x),
                    self[u][v]['feature-cache'], self[w][x]['feature-cache'])
        try:
            self.merge_queue.invalidate(self[w][x]['qlink'])
        except KeyError:
            pass
        self.update_merge_queue(u, v)

    def update_merge_queue(self, u, v):
        """Update the merge queue item for edge (u,v). Add new by default."""
        if self.boundary_body in [u, v]:
            return
        if self[u][v].has_key('qlink'):
            self.merge_queue.invalidate(self[u][v]['qlink'])
        if not self.merge_queue.is_null_queue:
            w = self.merge_priority_function(self,u,v)
            new_qitem = [w, True, u, v]
            self[u][v]['qlink'] = new_qitem
            self[u][v]['weight'] = w
            self.merge_queue.push(new_qitem)

    def get_segmentation(self):
        return morpho.juicy_center(self.segmentation, self.pad_thickness)

    def get_ucm(self):
        if hasattr(self, 'ignored_boundary'):
            self.ucm[self.ignored_boundary] = self.max_merge_score
        return morpho.juicy_center(self.ucm, self.pad_thickness)    

    def build_volume(self, nbunch=None):
        """Return the segmentation (numpy.ndarray) induced by the graph."""
        v = zeros_like(self.watershed)
        vr = v.ravel()
        if nbunch is None:
            nbunch = self.nodes()
        for n in nbunch:
            vr[list(self.node[n]['extent'])] = n
        return morpho.juicy_center(v,self.pad_thickness)

    def build_boundary_map(self, ebunch=None):
        if len(self.merge_queue) == 0:
            self.rebuild_merge_queue()
        m = zeros(self.watershed.shape, double)
        mr = m.ravel()
        if ebunch is None:
            ebunch = self.real_edges_iter()
        ebunch = sorted([(self[u][v]['weight'], u, v) for u, v in ebunch])
        for w, u, v in ebunch:
            b = list(self[u][v]['boundary'])
            mr[b] = w
        if hasattr(self, 'ignored_boundary'):
            m[self.ignored_boundary] = inf
        return morpho.juicy_center(m, self.pad_thickness)

    def remove_obvious_inclusions(self):
        """Merge any nodes with only one edge to their neighbors."""
        for n in self.nodes():
            if self.degree(n) == 1:
                self.merge_nodes(self.neighbors(n)[0], n)

    def remove_inclusions(self):
        """Merge any segments fully contained within other segments."""
        bcc = list(biconnected_components(self))
        container = [i for i, s in enumerate(bcc) if self.boundary_body in s][0]
        del bcc[container] # remove the main graph
        bcc = map(list, bcc)
        for cc in bcc:
            cc.sort(key=lambda x: len(self.node[x]['extent']), reverse=True)
        bcc.sort(key=lambda x: len(self.node[x[0]]['extent']))
        for cc in bcc:
            self.merge_subgraph(cc, cc[0])

    def orphans(self):
        """List of all the nodes that do not touch the volume boundary."""
        return [n for n in self.nodes() if not self.at_volume_boundary(n)]

    def is_traversed_by_node(self, n):
        """Determine whether a body traverses the volume.
        
        This is defined as touching the volume boundary at two distinct 
        locations.
        """
        if not self.at_volume_boundary(n) or n == self.boundary_body:
            return False
        v = zeros(self.segmentation.shape, uint8)
        v.ravel()[list(self[n][self.boundary_body]['boundary'])] = 1
        _, n = label(v, ones([3]*v.ndim))
        return n > 1

    def traversing_bodies(self):
        """List all bodies that traverse the volume."""
        return [n for n in self.nodes() if self.is_traversed_by_node(n)]

    def non_traversing_bodies(self):
        """List bodies that are not orphans and do not traverse the volume."""
        return [n for n in self.nodes() if self.at_volume_boundary(n) and
            not self.is_traversed_by_node(n)]

    def raveler_body_annotations(self, traverse=False):
        """Return JSON-compatible dict formatted for Raveler annotations."""
        orphans = self.orphans()
        non_traversing_bodies = self.non_traversing_bodies() if traverse else []
        data = \
            [{'status':'not sure', 'comment':'orphan', 'body ID':int(o)}
                for o in orphans] +\
            [{'status':'not sure', 'comment':'does not traverse', 
                'body ID':int(n)} for n in non_traversing_bodies]
        metadata = {'description':'body annotations', 'file version':2}
        return {'data':data, 'metadata':metadata}

    def at_volume_boundary(self, n):
        """Return True if node n touches the volume boundary."""
        return self.has_edge(n, self.boundary_body) or n == self.boundary_body

    def should_merge(self, n1, n2):
        return self.rig[n1].argmax() == self.rig[n2].argmax()

    def get_pixel_label(self, n1, n2):
        boundary = array(list(self[n1][n2]['boundary']))
        min_idx = boundary[self.probabilities_r[boundary,0].argmin()]
        if self.should_merge(n1, n2):
            return min_idx, 2
        else:
            return min_idx, 1

    def pixel_labels_array(self, false_splits_only=False):
        ar = zeros_like(self.watershed_r)
        labels = [self.get_pixel_label(*e) for e in self.real_edges()]
        if false_splits_only:
            labels = [l for l in labels if l[1] == 2]
        ids, ls = map(array,zip(*labels))
        ar[ids] = ls.astype(ar.dtype)
        return ar.reshape(self.watershed.shape)

    def split_vi(self, gt=None):
        if self.gt is None and gt is None:
            return array([0,0])
        elif self.gt is not None:
            return split_vi(None, None, self.rig)
        else:
            return split_vi(self.get_segmentation(), gt, None, [0], [0])

    def boundary_indices(self, n1, n2):
        return list(self[n1][n2]['boundary'])

    def get_edge_coordinates(self, n1, n2, arbitrary=False):
        """Find where in the segmentation the edge (n1, n2) is most visible."""
        return get_edge_coordinates(self, n1, n2, arbitrary)

    def write(self, fout, output_format='GraphML'):
        if output_format == 'Plaza JSON':
            self.write_plaza_json(fout)
        else:
            raise ValueError('Unsupported output format for agglo.Rag: %s'
                % output_format)
        
    def write_plaza_json(self, fout):
        """Write graph to Steve Plaza's JSON spec."""
        edge_list = [
            {'location': map(int, self.get_edge_coordinates(i, j)[-1::-1]),
            'node1': int(i), 'node2': int(j),
            'edge_size': len(self[i][j]['boundary']),
            'size1': len(self.node[i]['extent']), 
            'size2': len(self.node[j]['extent']),
            'weight': float(self[i][j]['weight'])}
            for i, j in self.real_edges()
        ]
        with open(fout, 'w') as f:
            json.dump({'edge_list': edge_list}, f, indent=4)

    def ncut(self, num_clusters=10, kmeans_iters=5, sigma=255.0*20, nodes=None,
            **kwargs):
        """Run normalized cuts on the current set of superpixels.
           Keyword arguments:
               num_clusters -- number of clusters to compute
               kmeans_iters -- # iterations to run kmeans when clustering
               sigma -- sigma value when setting up weight matrix
           Return value: None
        """
        if nodes is None:
            nodes = self.nodes()
        # Compute weight matrix
        W = self.compute_W(self.merge_priority_function, nodes=nodes)
        # Run normalized cut
        labels, eigvec, eigval = ncutW(W, num_clusters, kmeans_iters, **kwargs)
        # Merge nodes that are in same cluster
        self.cluster_by_labels(labels, nodes) 
    
    def cluster_by_labels(self, labels, nodes=None):
        """Merge all superpixels with the same label (1 label per 1 sp)"""
        if nodes is None:
            nodes = array(self.nodes())
        if not (len(labels) == len(nodes)):
            raise ValueError('Number of labels should be %d but is %d.', 
                self.number_of_nodes(), len(labels))
        for l in unique(labels):
            inds = nonzero(labels==l)[0]
            nodes_to_merge = nodes[inds]
            node1 = nodes_to_merge[0]
            for node in nodes_to_merge[1:]:
                self.merge_nodes(node1,node)
                
    def compute_W(self, merge_priority_function, sigma=255.0*20, nodes=None):
        """ Computes the weight matrix for clustering"""
        if nodes is None:
            nodes = array(self.nodes())
        n = len(nodes)
        nodes2ind = dict(zip(nodes, range(n)))
        W = lil_matrix((n,n))
        for u, v in self.real_edges(nodes):
            try:
                i, j = nodes2ind[u], nodes2ind[v]
            except KeyError:
                continue
            w = merge_priority_function(self,u,v)
            W[i,j] = W[j,i] = exp(-w**2/sigma)
        return W
Beispiel #20
0
def test_bump_pr():
    mq = MergeQueue(FakeGHRepo())
    mq.ask_pr(12)
    mq.ask_pr(13)
    assert mq.queue[0].nb == 12
    with pytest.raises(MergeQueueException):
        mq.bump_pr(13)
    mq.bless_pr(13)
    mq.bump_pr(13)
    assert mq.queue[0].nb == 13
    assert mq.queue[1].nb == 12
    assert mq.pulled_prs[0] == 13
    with pytest.raises(MergeQueueException):
        mq.bump_pr(14)
Beispiel #21
0
def test_getpr():
    mq = MergeQueue(FakeGHRepo())
    pr, gh_pr = mq.get_pr(12)
    assert pr.nb == 12
    assert gh_pr.number == 12
Beispiel #22
0
def test_set_stats_plugin():
    """Test setting stats plugin"""
    repo = FakeGHRepo()
    mq = MergeQueue(repo)

    assert type(mq.stats) == NoStats
Beispiel #23
0
def test_check_queue_depth():
    pr_14 = FakeGHPullRequest(14,
                              reviews=[FakeGHReview('user1', APPROVED)],
                              mergeable_state=BEHIND)
    pr_15 = FakeGHPullRequest(15,
                              reviews=[FakeGHReview('user2', APPROVED)],
                              mergeable_state=BEHIND)
    pr_16 = FakeGHPullRequest(16,
                              reviews=[FakeGHReview('user2', APPROVED)],
                              mergeable_state=BEHIND)
    repo = FakeGHRepo(injected_prs=[pr_15, pr_14, pr_16])
    mq = MergeQueue(repo, max_pulled_prs=2)
    mq.ask_pr(14)
    mq.ask_pr(15)
    mq.ask_pr(16)
    transitions = list(mq.check())
    assert len(transitions) == 0  # nothing changed so nothing should happen

    mq.bless_pr(14)
    mq.bless_pr(15)
    mq.bless_pr(16)

    # It should pull the base branch only on 14 and 15
    transitions = list(mq.check())
    assert len(transitions) == 2
    for pr, trs in transitions:
        assert trs[0][0] == PRTransition.PULLED
        assert trs[1][0] == PRTransition.PULLED_SUCCESS

    # Lets say the first one worked
    pr_14.mergeable_state = CLEAN
    pr_14.mergeable = True

    # Second time around it merge the one mergeable.
    transitions = list(mq.check())
    assert len(transitions) == 2
    for pr, trs in transitions:
        if pr.nb == 14:
            assert trs[0][0] == PRTransition.NOW_MERGEABLE and trs[1][
                0] == PRTransition.MERGING

    assert pr_14.asked_to_be_merged

    pr_14.merged = True  # PR has been merged
    pr_15.mergeable_state = BLOCKED  # CI still catching up

    # Third time around as a slot has been freed up, it should pull the last one.
    transitions = list(mq.check())
    assert len(transitions) == 2
    for pr, trs in transitions:
        if pr.nb == 14:
            assert trs[0][0] == PRTransition.MERGED
        elif pr.nb == 16:
            assert trs[0][0] == PRTransition.PULLED and trs[1][
                0] == PRTransition.PULLED_SUCCESS

    assert len(mq.pulled_prs) == 2
    pr_15.mergeable_state = DIRTY
    transitions = list(mq.check())
    assert len(transitions) == 1
    assert len(mq.pulled_prs) == 1
Beispiel #24
0
def test_init():
    mq = MergeQueue(FakeGHRepo())
    assert mq.get_queue() == []