Example #1
0
    def get_labels(self, shape=None):
        '''Get a set of labels matrices consisting of non-overlapping labels
        
        In IJV format, a single pixel might have multiple labels. If you
        want to use a labels matrix, you have an ambiguous situation and the
        resolution is to process separate labels matrices consisting of
        non-overlapping labels.
        
        returns a list of label matrixes and the indexes in each
        '''
        if self.__segmented is not None:
            return [(self.__segmented, self.indices)]
        elif self.__ijv is not None:
            if shape is None:
                shape = self.__shape

            def ijv_to_segmented(ijv, shape=shape):
                if shape is not None:
                    pass
                elif self.has_parent_image:
                    shape = self.parent_image.pixel_data.shape
                elif len(ijv) == 0:
                    # degenerate case, no parent info and no labels
                    shape = (1, 1)
                else:
                    shape = np.max(ijv[:, :2],
                                   0) + 2  # add a border of "0" to the right
                labels = np.zeros(shape, np.int16)
                if ijv.shape[0] > 0:
                    labels[ijv[:, 0], ijv[:, 1]] = ijv[:, 2]
                return labels

            if len(self.__ijv) == 0:
                return [(ijv_to_segmented(self.__ijv), self.indices)]

            sort_order = np.lexsort(
                (self.__ijv[:, 2], self.__ijv[:, 1], self.__ijv[:, 0]))
            sijv = self.__ijv[sort_order]
            #
            # Locations in sorted array where i,j are same consecutively
            # are locations that have an overlap.
            #
            overlap = np.all(sijv[:-1, :2] == sijv[1:, :2], 1)
            #
            # Find the # at each location by finding the index of the
            # first example of a location, then subtracting successive indexes
            #
            firsts = np.argwhere(np.hstack(
                ([True], ~overlap, [True]))).flatten()
            counts = firsts[1:] - firsts[:-1]
            indexer = Indexes(counts)
            #
            # Eliminate the locations that are singly labeled
            #
            sijv = sijv[counts[indexer.rev_idx] > 1, :]
            counts = counts[counts > 1]
            if len(counts) == 0:
                return [(ijv_to_segmented(self.__ijv), self.indices)]
            #
            # There are n * n-1 pairs for each coordinate (n = # labels)
            # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs
            #
            pairs = all_pairs(np.max(counts))
            pair_counts = counts * (counts - 1)
            #
            # Create an indexer for the inputs (sijv) and for the outputs
            # (first and second of the pairs)
            #
            input_indexer = Indexes(counts)
            output_indexer = Indexes(pair_counts)
            first = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] +
                         pairs[output_indexer.idx[0], 0], 2]
            second = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] +
                          pairs[output_indexer.idx[0], 1], 2]
            #
            # And sort these so that we get consecutive lists for each
            #
            sort_order = np.lexsort((second, first))
            first = first[sort_order]
            second = second[sort_order]
            #
            # Eliminate dupes
            #
            to_keep = np.hstack(
                ([True],
                 (first[1:] != first[:-1]) | (second[1:] != second[:-1])))
            first = first[to_keep]
            second = second[to_keep]
            #
            # Bincount each label so we can find the ones that have the
            # most overlap. See cpmorphology.color_labels and
            # Welsh, "An upper bound for the chromatic number of a graph and
            # its application to timetabling problems", The Computer Journal, 10(1)
            # p 85 (1967)
            #
            overlap_counts = np.bincount(first)
            nlabels = len(self.indices)
            if len(overlap_counts) < nlabels + 1:
                overlap_counts = np.hstack(
                    (overlap_counts,
                     [0] * (nlabels - len(overlap_counts) + 1)))
            #
            # The index to the i'th label's stuff
            #
            indexes = np.cumsum(overlap_counts) - overlap_counts
            #
            # A vector of a current color per label
            #
            v_color = np.zeros(len(overlap_counts), int)
            #
            # Assign all non-overlapping to color 1
            #
            v_color[overlap_counts == 0] = 1
            #
            # Assign all absent objects to color -1
            #
            v_color[1:][self.areas == 0] = -1
            #
            # The processing order is from most overlapping to least
            #
            processing_order = np.lexsort(
                (np.arange(len(overlap_counts)), overlap_counts))
            processing_order = processing_order[
                overlap_counts[processing_order] > 0]

            for index in processing_order:
                neighbors = second[indexes[index]:indexes[index] +
                                   overlap_counts[index]]
                colors = np.unique(v_color[neighbors])
                if colors[0] == 0:
                    if len(colors) == 1:
                        # all unassigned - put self in group 1
                        v_color[index] = 1
                        continue
                    else:
                        # otherwise, ignore the unprocessed group and continue
                        colors = colors[1:]
                # Match a range against the colors array - the first place
                # they don't match is the first color we can use
                crange = np.arange(1, len(colors) + 1)
                misses = crange[colors != crange]
                if len(misses):
                    color = misses[0]
                else:
                    max_color = len(colors) + 1
                    color = max_color
                v_color[index] = color
            #
            # Now, get ijv groups by color
            #
            result = []
            for color in np.unique(v_color):
                if color == -1:
                    continue
                ijv = self.__ijv[v_color[self.__ijv[:, 2]] == color]
                indices = np.arange(1, len(v_color))[v_color[1:] == color]
                result.append((ijv_to_segmented(ijv), indices))
            return result
        else:
            return []
Example #2
0
    def __convert_sparse_to_dense(self):
        from cellprofiler.utilities.hdf5_dict import HDF5ObjectSet
        sparse = self.get_sparse()
        if len(sparse) == 0:
            return self.__set_or_cache_dense(
                np.zeros([1] + list(self.shape), np.uint16))

        #
        # The code below assigns a "color" to each label so that no
        # two labels have the same color
        #
        positional_columns = []
        available_columns = []
        lexsort_columns = []
        for axis in HDF5ObjectSet.AXES:
            if axis in sparse.dtype.fields.keys():
                positional_columns.append(sparse[axis])
                available_columns.append(sparse[axis])
                lexsort_columns.insert(0, sparse[axis])
            else:
                positional_columns.append(0)
        labels = sparse[HDF5ObjectSet.AXIS_LABELS]
        lexsort_columns.insert(0, labels)

        sort_order = np.lexsort(lexsort_columns)
        n_labels = np.max(labels)
        #
        # Find the first of a run that's different from the rest
        #
        mask = available_columns[0][sort_order[:-1]] != \
            available_columns[0][sort_order[1:]]
        for column in available_columns[1:]:
            mask = mask | (column[sort_order[:-1]] != column[sort_order[1:]])
        breaks = np.hstack(([0], np.where(mask)[0] + 1, [len(labels)]))
        firsts = breaks[:-1]
        counts = breaks[1:] - firsts
        indexer = Indexes(counts)
        #
        # Eliminate the locations that are singly labeled
        #
        mask = counts > 1
        firsts = firsts[mask]
        counts = counts[mask]
        if len(counts) == 0:
            dense = np.zeros([1] + list(self.shape), labels.dtype)
            dense[[0] + positional_columns] = labels
            return self.__set_or_cache_dense(dense)
        #
        # There are n * n-1 pairs for each coordinate (n = # labels)
        # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs
        #
        pairs = all_pairs(np.max(counts))
        pair_counts = counts * (counts - 1)
        #
        # Create an indexer for the inputs (indexes) and for the outputs
        # (first and second of the pairs)
        #
        # Remember idx points into sort_order which points into labels
        # to get the nth label, grouped into consecutive positions.
        #
        input_indexer = Indexes(counts)
        output_indexer = Indexes(pair_counts)
        #
        # The start of the run of overlaps and the offsets
        #
        run_starts = firsts[output_indexer.rev_idx]
        offs = pairs[output_indexer.idx[0], :]
        first = labels[sort_order[run_starts + offs[:, 0]]]
        second = labels[sort_order[run_starts + offs[:, 1]]]
        #
        # And sort these so that we get consecutive lists for each
        #
        pair_sort_order = np.lexsort((second, first))
        #
        # Eliminate dupes
        #
        to_keep = np.hstack(
            ([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1])))
        to_keep = to_keep & (first != second)
        pair_idx = pair_sort_order[to_keep]
        first = first[pair_idx]
        second = second[pair_idx]
        #
        # Bincount each label so we can find the ones that have the
        # most overlap. See cpmorphology.color_labels and
        # Welsh, "An upper bound for the chromatic number of a graph and
        # its application to timetabling problems", The Computer Journal, 10(1)
        # p 85 (1967)
        #
        overlap_counts = np.bincount(first.astype(np.int32))
        #
        # The index to the i'th label's stuff
        #
        indexes = np.cumsum(overlap_counts) - overlap_counts
        #
        # A vector of a current color per label. All non-overlapping
        # objects are assigned to plane 1
        #
        v_color = np.ones(n_labels + 1, int)
        v_color[0] = 0
        #
        # Clear all overlapping objects
        #
        v_color[np.unique(first)] = 0
        #
        # The processing order is from most overlapping to least
        #
        ol_labels = np.where(overlap_counts > 0)[0]
        processing_order = np.lexsort((ol_labels, overlap_counts[ol_labels]))

        for index in ol_labels[processing_order]:
            neighbors = second[indexes[index]:indexes[index] +
                               overlap_counts[index]]
            colors = np.unique(v_color[neighbors])
            if colors[0] == 0:
                if len(colors) == 1:
                    # all unassigned - put self in group 1
                    v_color[index] = 1
                    continue
                else:
                    # otherwise, ignore the unprocessed group and continue
                    colors = colors[1:]
            # Match a range against the colors array - the first place
            # they don't match is the first color we can use
            crange = np.arange(1, len(colors) + 1)
            misses = crange[colors != crange]
            if len(misses):
                color = misses[0]
            else:
                max_color = len(colors) + 1
                color = max_color
            v_color[index] = color
        #
        # Create the dense matrix by using the color to address the
        # 5-d hyperplane into which we place each label
        #
        result = []
        dense = np.zeros([np.max(v_color)] + list(self.shape), labels.dtype)
        slices = tuple([v_color[labels] - 1] + positional_columns)
        dense[slices] = labels
        indices = [
            np.where(v_color == i)[0] for i in range(1, dense.shape[0] + 1)
        ]

        return self.__set_or_cache_dense(dense, indices)
Example #3
0
 def get_labels(self, shape = None):
     '''Get a set of labels matrices consisting of non-overlapping labels
     
     In IJV format, a single pixel might have multiple labels. If you
     want to use a labels matrix, you have an ambiguous situation and the
     resolution is to process separate labels matrices consisting of
     non-overlapping labels.
     
     returns a list of label matrixes and the indexes in each
     '''
     if self.__segmented is not None:
         return [(self.__segmented, self.indices)]
     elif self.__ijv is not None:
         if shape is None:
             shape = self.__shape
         def ijv_to_segmented(ijv, shape=shape):
             if shape is not None:
                 pass
             elif self.has_parent_image:
                 shape = self.parent_image.pixel_data.shape
             elif len(ijv) == 0:
                 # degenerate case, no parent info and no labels
                 shape = (1,1)
             else:
                 shape = np.max(ijv[:,:2], 0) + 2 # add a border of "0" to the right
             labels = np.zeros(shape, np.int16)
             if ijv.shape[0] > 0:
                 labels[ijv[:,0],ijv[:,1]] = ijv[:,2]
             return labels
         
         if len(self.__ijv) == 0:
             return [(ijv_to_segmented(self.__ijv), self.indices)]
         
         sort_order = np.lexsort((self.__ijv[:,2],
                                  self.__ijv[:,1], 
                                  self.__ijv[:,0]))
         sijv = self.__ijv[sort_order]
         #
         # Locations in sorted array where i,j are same consecutively
         # are locations that have an overlap.
         #
         overlap = np.all(sijv[:-1,:2] == sijv[1:,:2],1)
         #
         # Find the # at each location by finding the index of the
         # first example of a location, then subtracting successive indexes
         #
         firsts = np.argwhere(np.hstack(([True], ~overlap, [True]))).flatten()
         counts = firsts[1:] - firsts[:-1]
         indexer = Indexes(counts)
         #
         # Eliminate the locations that are singly labeled
         #
         sijv = sijv[counts[indexer.rev_idx] > 1, :]
         counts = counts[counts > 1]
         if len(counts) == 0:
             return [(ijv_to_segmented(self.__ijv), self.indices)]
         #
         # There are n * n-1 pairs for each coordinate (n = # labels)
         # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs
         #
         pairs = all_pairs(np.max(counts))
         pair_counts = counts * (counts - 1)
         #
         # Create an indexer for the inputs (sijv) and for the outputs
         # (first and second of the pairs)
         #
         input_indexer = Indexes(counts)
         output_indexer = Indexes(pair_counts)
         first = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] +
                      pairs[output_indexer.idx[0], 0], 2]
         second = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] +
                       pairs[output_indexer.idx[0], 1], 2]
         #
         # And sort these so that we get consecutive lists for each
         #
         sort_order = np.lexsort((second, first))
         first = first[sort_order]
         second = second[sort_order]
         #
         # Eliminate dupes
         #
         to_keep = np.hstack(([True], 
                              (first[1:] != first[:-1]) |
                              (second[1:] != second[:-1])))
         first = first[to_keep]
         second = second[to_keep]
         #
         # Bincount each label so we can find the ones that have the
         # most overlap. See cpmorphology.color_labels and
         # Welsh, "An upper bound for the chromatic number of a graph and
         # its application to timetabling problems", The Computer Journal, 10(1)
         # p 85 (1967)
         #
         overlap_counts = np.bincount(first)
         nlabels = len(self.indices)
         if len(overlap_counts) < nlabels + 1:
             overlap_counts = np.hstack(
                 (overlap_counts, [0] * (nlabels - len(overlap_counts) + 1)))
         #
         # The index to the i'th label's stuff
         #
         indexes = np.cumsum(overlap_counts) - overlap_counts
         #
         # A vector of a current color per label
         #
         v_color = np.zeros(len(overlap_counts), int)
         #
         # Assign all non-overlapping to color 1
         #
         v_color[overlap_counts == 0] = 1
         #
         # Assign all absent objects to color -1
         #
         v_color[1:][self.areas == 0] = -1
         #
         # The processing order is from most overlapping to least
         #
         processing_order = np.lexsort((np.arange(len(overlap_counts)), overlap_counts))
         processing_order = processing_order[overlap_counts[processing_order] > 0]
         
         for index in processing_order:
             neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]]
             colors = np.unique(v_color[neighbors])
             if colors[0] == 0:
                 if len(colors) == 1:
                     # all unassigned - put self in group 1
                     v_color[index] = 1
                     continue
                 else:
                     # otherwise, ignore the unprocessed group and continue
                     colors = colors[1:]
             # Match a range against the colors array - the first place
             # they don't match is the first color we can use
             crange = np.arange(1, len(colors)+1)
             misses = crange[colors != crange]
             if len(misses):
                 color = misses[0]
             else:
                 max_color = len(colors) + 1
                 color = max_color
             v_color[index] = color
         #
         # Now, get ijv groups by color
         #
         result = []
         for color in np.unique(v_color):
             if color == -1:
                 continue
             ijv = self.__ijv[v_color[self.__ijv[:,2]] == color]
             indices = np.arange(1, len(v_color))[v_color[1:] == color]
             result.append((ijv_to_segmented(ijv), indices))
         return result
     else:
         return []
Example #4
0
    def __convert_sparse_to_dense(self):
        from cellprofiler.utilities.hdf5_dict import HDF5ObjectSet
        sparse = self.get_sparse()
        if len(sparse) == 0:
            return self.__set_or_cache_dense(
                np.zeros([1] + list(self.shape), np.uint16))

        #
        # The code below assigns a "color" to each label so that no
        # two labels have the same color
        #
        positional_columns = []
        available_columns = []
        lexsort_columns = []
        for axis in HDF5ObjectSet.AXES:
            if axis in sparse.dtype.fields.keys():
                positional_columns.append(sparse[axis])
                available_columns.append(sparse[axis])
                lexsort_columns.insert(0, sparse[axis])
            else:
                positional_columns.append(0)
        labels = sparse[HDF5ObjectSet.AXIS_LABELS]
        lexsort_columns.insert(0, labels)
        
        sort_order = np.lexsort(lexsort_columns)
        n_labels = np.max(labels)
        #
        # Find the first of a run that's different from the rest
        #
        mask = available_columns[0][sort_order[:-1]] != \
            available_columns[0][sort_order[1:]]
        for column in available_columns[1:]:
            mask = mask | (column[sort_order[:-1]] !=
                           column[sort_order[1:]])
        breaks = np.hstack(([0], np.where(mask)[0]+1, [len(labels)]))
        firsts = breaks[:-1]
        counts = breaks[1:] - firsts
        indexer = Indexes(counts)
        #
        # Eliminate the locations that are singly labeled
        #
        mask = counts > 1
        firsts = firsts[mask]
        counts = counts[mask]
        if len(counts) == 0:
            dense = np.zeros([1]+list(self.shape), labels.dtype)
            dense[[0] + positional_columns] = labels
            return self.__set_or_cache_dense(dense)
        #
        # There are n * n-1 pairs for each coordinate (n = # labels)
        # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs
        #
        pairs = all_pairs(np.max(counts))
        pair_counts = counts * (counts - 1)
        #
        # Create an indexer for the inputs (indexes) and for the outputs
        # (first and second of the pairs)
        #
        # Remember idx points into sort_order which points into labels
        # to get the nth label, grouped into consecutive positions.
        #
        input_indexer = Indexes(counts)
        output_indexer = Indexes(pair_counts)
        #
        # The start of the run of overlaps and the offsets
        #
        run_starts = firsts[output_indexer.rev_idx]
        offs = pairs[output_indexer.idx[0], :]
        first = labels[sort_order[run_starts + offs[:, 0]]]
        second = labels[sort_order[run_starts + offs[:, 1]]]
        #
        # And sort these so that we get consecutive lists for each
        #
        pair_sort_order = np.lexsort((second, first))
        #
        # Eliminate dupes
        #
        to_keep = np.hstack(([True], 
                             (first[1:] != first[:-1]) |
                             (second[1:] != second[:-1])))
        to_keep = to_keep & (first != second)
        pair_idx = pair_sort_order[to_keep]
        first = first[pair_idx]
        second = second[pair_idx]
        #
        # Bincount each label so we can find the ones that have the
        # most overlap. See cpmorphology.color_labels and
        # Welsh, "An upper bound for the chromatic number of a graph and
        # its application to timetabling problems", The Computer Journal, 10(1)
        # p 85 (1967)
        #
        overlap_counts = np.bincount(first.astype(np.int32))
        #
        # The index to the i'th label's stuff
        #
        indexes = np.cumsum(overlap_counts) - overlap_counts
        #
        # A vector of a current color per label. All non-overlapping
        # objects are assigned to plane 1
        #
        v_color = np.ones(n_labels+1, int)
        v_color[0] = 0
        #
        # Clear all overlapping objects
        #
        v_color[np.unique(first)] = 0
        #
        # The processing order is from most overlapping to least
        #
        ol_labels = np.where(overlap_counts > 0)[0]
        processing_order = np.lexsort((ol_labels, overlap_counts[ol_labels]))
        
        for index in ol_labels[processing_order]:
            neighbors = second[
                indexes[index]:indexes[index] + overlap_counts[index]]
            colors = np.unique(v_color[neighbors])
            if colors[0] == 0:
                if len(colors) == 1:
                    # all unassigned - put self in group 1
                    v_color[index] = 1
                    continue
                else:
                    # otherwise, ignore the unprocessed group and continue
                    colors = colors[1:]
            # Match a range against the colors array - the first place
            # they don't match is the first color we can use
            crange = np.arange(1, len(colors)+1)
            misses = crange[colors != crange]
            if len(misses):
                color = misses[0]
            else:
                max_color = len(colors) + 1
                color = max_color
            v_color[index] = color
        #
        # Create the dense matrix by using the color to address the
        # 5-d hyperplane into which we place each label
        #
        result = []
        dense = np.zeros([np.max(v_color)]+list(self.shape), labels.dtype)
        slices = tuple([v_color[labels]-1] + positional_columns)
        dense[slices] = labels
        indices = [
            np.where(v_color == i)[0] for i in range(1, dense.shape[0]+1)]
        
        return self.__set_or_cache_dense(dense, indices)