def compute_rand_index_ijv(self, gt_ijv, test_ijv, shape): '''Compute the Rand Index for an IJV matrix This is in part based on the Omega Index: Collins, "Omega: A General Formulation of the Rand Index of Cluster Recovery Suitable for Non-disjoint Solutions", Multivariate Behavioral Research, 1988, 23, 231-242 The basic idea of the paper is that a pair should be judged to agree only if the number of clusters in which they appear together is the same. ''' # # The idea here is to assign a label to every pixel position based # on the set of labels given to that position by both the ground # truth and the test set. We then assess each pair of labels # as agreeing or disagreeing as to the number of matches. # # First, add the backgrounds to the IJV with a label of zero # gt_bkgd = np.ones(shape, bool) gt_bkgd[gt_ijv[:, 0], gt_ijv[:, 1]] = False test_bkgd = np.ones(shape, bool) test_bkgd[test_ijv[:, 0], test_ijv[:, 1]] = False gt_ijv = np.vstack([ gt_ijv, np.column_stack([np.argwhere(gt_bkgd), np.zeros(np.sum(gt_bkgd), gt_bkgd.dtype)])]) test_ijv = np.vstack([ test_ijv, np.column_stack([np.argwhere(test_bkgd), np.zeros(np.sum(test_bkgd), test_bkgd.dtype)])]) # # Create a unified structure for the pixels where a fourth column # tells you whether the pixels came from the ground-truth or test # u = np.vstack([ np.column_stack([gt_ijv, np.zeros(gt_ijv.shape[0], gt_ijv.dtype)]), np.column_stack([test_ijv, np.ones(test_ijv.shape[0], test_ijv.dtype)])]) # # Sort by coordinates, then by identity # order = np.lexsort([u[:, 2], u[:, 3], u[:, 0], u[:, 1]]) u = u[order, :] # Get rid of any duplicate labelings (same point labeled twice with # same label. # first = np.hstack([[True], np.any(u[:-1, :] != u[1:, :], 1)]) u = u[first, :] # # Create a 1-d indexer to point at each unique coordinate. # first_coord_idxs = np.hstack([ [0], np.argwhere((u[:-1, 0] != u[1:, 0]) | (u[:-1, 1] != u[1:, 1])).flatten() + 1, [u.shape[0]]]) first_coord_counts = first_coord_idxs[1:] - first_coord_idxs[:-1] indexes = Indexes([first_coord_counts]) # # Count the number of labels at each point for both gt and test # count_test = np.bincount(indexes.rev_idx, u[:, 3]).astype(np.int64) count_gt = first_coord_counts - count_test # # For each # of labels, pull out the coordinates that have # that many labels. Count the number of similarly labeled coordinates # and record the count and labels for that group. # labels = [] for i in range(1, np.max(count_test)+1): for j in range(1, np.max(count_gt)+1): match = ((count_test[indexes.rev_idx] == i) & (count_gt[indexes.rev_idx] == j)) if not np.any(match): continue # # Arrange into an array where the rows are coordinates # and the columns are the labels for that coordinate # lm = u[match, 2].reshape(np.sum(match) / (i+j), i+j) # # Sort by label. # order = np.lexsort(lm.transpose()) lm = lm[order, :] # # Find indices of unique and # of each # lm_first = np.hstack([ [0], np.argwhere(np.any(lm[:-1, :] != lm[1:, :], 1)).flatten()+1, [lm.shape[0]]]) lm_count = lm_first[1:] - lm_first[:-1] for idx, count in zip(lm_first[:-1], lm_count): labels.append((count, lm[idx, :j], lm[idx, j:])) # # We now have our sets partitioned. Do each against each to get # the number of true positive and negative pairs. # max_t_labels = reduce(max, [len(t) for c, t, g in labels], 0) max_g_labels = reduce(max, [len(g) for c, t, g in labels], 0) # # tbl is the contingency table from Table 4 of the Collins paper # It's a table of the number of pairs which fall into M sets # in the ground truth case and N in the test case. # tbl = np.zeros(((max_t_labels + 1), (max_g_labels + 1))) for i, (c1, tobject_numbers1, gobject_numbers1) in enumerate(labels): for j, (c2, tobject_numbers2, gobject_numbers2) in \ enumerate(labels[i:]): nhits_test = np.sum( tobject_numbers1[:, np.newaxis] == tobject_numbers2[np.newaxis, :]) nhits_gt = np.sum( gobject_numbers1[:, np.newaxis] == gobject_numbers2[np.newaxis, :]) if j == 0: N = c1 * (c1 - 1) / 2 else: N = c1 * c2 tbl[nhits_test, nhits_gt] += N N = np.sum(tbl) # # Equation 13 from the paper # min_JK = min(max_t_labels, max_g_labels)+1 rand_index = np.sum(tbl[:min_JK, :min_JK] * np.identity(min_JK)) / N # # Equation 15 from the paper, the expected index # e_omega = np.sum(np.sum(tbl[:min_JK,:min_JK], 0) * np.sum(tbl[:min_JK,:min_JK], 1)) / N **2 # # Equation 16 is the adjusted index # adjusted_rand_index = (rand_index - e_omega) / (1 - e_omega) return rand_index, adjusted_rand_index
def get_labels(self, shape=None): '''Get a set of labels matrices consisting of non-overlapping labels In IJV format, a single pixel might have multiple labels. If you want to use a labels matrix, you have an ambiguous situation and the resolution is to process separate labels matrices consisting of non-overlapping labels. returns a list of label matrixes and the indexes in each ''' if self.__segmented is not None: return [(self.__segmented, self.indices)] elif self.__ijv is not None: if shape is None: shape = self.__shape def ijv_to_segmented(ijv, shape=shape): if shape is not None: pass elif self.has_parent_image: shape = self.parent_image.pixel_data.shape elif len(ijv) == 0: # degenerate case, no parent info and no labels shape = (1, 1) else: shape = np.max(ijv[:, :2], 0) + 2 # add a border of "0" to the right labels = np.zeros(shape, np.int16) if ijv.shape[0] > 0: labels[ijv[:, 0], ijv[:, 1]] = ijv[:, 2] return labels if len(self.__ijv) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] sort_order = np.lexsort( (self.__ijv[:, 2], self.__ijv[:, 1], self.__ijv[:, 0])) sijv = self.__ijv[sort_order] # # Locations in sorted array where i,j are same consecutively # are locations that have an overlap. # overlap = np.all(sijv[:-1, :2] == sijv[1:, :2], 1) # # Find the # at each location by finding the index of the # first example of a location, then subtracting successive indexes # firsts = np.argwhere(np.hstack( ([True], ~overlap, [True]))).flatten() counts = firsts[1:] - firsts[:-1] indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # sijv = sijv[counts[indexer.rev_idx] > 1, :] counts = counts[counts > 1] if len(counts) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (sijv) and for the outputs # (first and second of the pairs) # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) first = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 0], 2] second = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 1], 2] # # And sort these so that we get consecutive lists for each # sort_order = np.lexsort((second, first)) first = first[sort_order] second = second[sort_order] # # Eliminate dupes # to_keep = np.hstack( ([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) first = first[to_keep] second = second[to_keep] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first) nlabels = len(self.indices) if len(overlap_counts) < nlabels + 1: overlap_counts = np.hstack( (overlap_counts, [0] * (nlabels - len(overlap_counts) + 1))) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label # v_color = np.zeros(len(overlap_counts), int) # # Assign all non-overlapping to color 1 # v_color[overlap_counts == 0] = 1 # # Assign all absent objects to color -1 # v_color[1:][self.areas == 0] = -1 # # The processing order is from most overlapping to least # processing_order = np.lexsort( (np.arange(len(overlap_counts)), overlap_counts)) processing_order = processing_order[ overlap_counts[processing_order] > 0] for index in processing_order: neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors) + 1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Now, get ijv groups by color # result = [] for color in np.unique(v_color): if color == -1: continue ijv = self.__ijv[v_color[self.__ijv[:, 2]] == color] indices = np.arange(1, len(v_color))[v_color[1:] == color] result.append((ijv_to_segmented(ijv), indices)) return result else: return []
def measure_objects(self, workspace): image_set = workspace.image_set object_name_GT = self.object_name_GT.value objects_GT = workspace.get_objects(object_name_GT) iGT,jGT,lGT = objects_GT.ijv.transpose() object_name_ID = self.object_name_ID.value objects_ID = workspace.get_objects(object_name_ID) iID, jID, lID = objects_ID.ijv.transpose() ID_obj = 0 if len(lID) == 0 else max(lID) GT_obj = 0 if len(lGT) == 0 else max(lGT) xGT, yGT = objects_GT.shape xID, yID = objects_ID.shape GT_pixels = np.zeros((xGT, yGT)) ID_pixels = np.zeros((xID, yID)) total_pixels = xGT*yGT GT_pixels[iGT, jGT] = 1 ID_pixels[iID, jID] = 1 GT_tot_area = len(iGT) if len(iGT) == 0 and len(iID) == 0: intersect_matrix = np.zeros((0, 0), int) else: # # Build a matrix with rows of i, j, label and a GT/ID flag # all_ijv = np.column_stack( (np.hstack((iGT, iID)), np.hstack((jGT, jID)), np.hstack((lGT, lID)), np.hstack((np.zeros(len(iGT)), np.ones(len(iID)))))) # # Order it so that runs of the same i, j are consecutive # order = np.lexsort((all_ijv[:, -1], all_ijv[:, 0], all_ijv[:, 1])) all_ijv = all_ijv[order, :] # Mark the first at each i, j != previous i, j first = np.where(np.hstack( ([True], ~ np.all(all_ijv[:-1, :2] == all_ijv[1:, :2], 1), [True])))[0] # Count # at each i, j count = first[1:] - first[:-1] # First indexer - mapping from i,j to index in all_ijv all_ijv_map = Indexes([count]) # Bincount to get the # of ID pixels per i,j id_count = np.bincount(all_ijv_map.rev_idx, all_ijv[:, -1]).astype(int) gt_count = count - id_count # Now we can create an indexer that has NxM elements per i,j # where N is the number of GT pixels at that i,j and M is # the number of ID pixels. We can then use the indexer to pull # out the label values for each to populate a sparse array. # cross_map = Indexes([id_count, gt_count]) off_gt = all_ijv_map.fwd_idx[cross_map.rev_idx] + cross_map.idx[0] off_id = all_ijv_map.fwd_idx[cross_map.rev_idx] + cross_map.idx[1]+\ id_count[cross_map.rev_idx] intersect_matrix = coo_matrix( (np.ones(len(off_gt)), (all_ijv[off_id, 2], all_ijv[off_gt, 2])), shape = (ID_obj+1, GT_obj+1)).toarray()[1:, 1:] gt_areas = objects_GT.areas id_areas = objects_ID.areas FN_area = gt_areas[np.newaxis, :] - intersect_matrix all_intersecting_area = np.sum(intersect_matrix) dom_ID = [] for i in range(0, ID_obj): indices_jj = np.nonzero(lID==i) indices_jj = indices_jj[0] id_i = iID[indices_jj] id_j = jID[indices_jj] ID_pixels[id_i, id_j] = 1 for i in intersect_matrix: # loop through the GT objects first if len(i) == 0 or max(i) == 0: id = -1 # we missed the object; arbitrarily assign -1 index else: id = np.where(i == max(i))[0][0] # what is the ID of the max pixels? dom_ID += [id] # for ea GT object, which is the dominating ID? dom_ID = np.array(dom_ID) for i in range(0, len(intersect_matrix.T)): if len(np.where(dom_ID == i)[0]) > 1: final_id = np.where(intersect_matrix.T[i] == max(intersect_matrix.T[i])) final_id = final_id[0][0] all_id = np.where(dom_ID == i)[0] nonfinal = [x for x in all_id if x != final_id] for n in nonfinal: # these others cannot be candidates for the corr ID now intersect_matrix.T[i][n] = 0 else : continue TP = 0 FN = 0 FP = 0 for i in range(0,len(dom_ID)): d = dom_ID[i] if d == -1: tp = 0 fn = id_areas[i] fp = 0 else: fp = np.sum(intersect_matrix[i][0:d])+np.sum(intersect_matrix[i][(d+1)::]) tp = intersect_matrix[i][d] fn = FN_area[i][d] TP += tp FN += fn FP += fp TN = max(0, total_pixels - TP - FN - FP) def nan_divide(numerator, denominator): if denominator == 0: return np.nan return float(numerator) / float(denominator) accuracy = nan_divide(TP, all_intersecting_area) recall = nan_divide(TP, GT_tot_area) precision = nan_divide(TP, (TP+FP)) F_factor = nan_divide(2*(precision*recall), (precision+recall)) true_positive_rate = nan_divide(TP, (FN+TP)) false_positive_rate = nan_divide(FP, (FP+TN)) false_negative_rate = nan_divide(FN, (FN+TP)) true_negative_rate = nan_divide(TN , (FP+TN)) shape = np.maximum(np.maximum( np.array(objects_GT.shape), np.array(objects_ID.shape)), np.ones(2, int)) rand_index, adjusted_rand_index = self.compute_rand_index_ijv( objects_GT.ijv, objects_ID.ijv, shape) m = workspace.measurements m.add_image_measurement(self.measurement_name(FTR_F_FACTOR), F_factor) m.add_image_measurement(self.measurement_name(FTR_PRECISION), precision) m.add_image_measurement(self.measurement_name(FTR_RECALL), recall) m.add_image_measurement(self.measurement_name(FTR_TRUE_POS_RATE), true_positive_rate) m.add_image_measurement(self.measurement_name(FTR_FALSE_POS_RATE), false_positive_rate) m.add_image_measurement(self.measurement_name(FTR_TRUE_NEG_RATE), true_negative_rate) m.add_image_measurement(self.measurement_name(FTR_FALSE_NEG_RATE), false_negative_rate) m.add_image_measurement(self.measurement_name(FTR_RAND_INDEX), rand_index) m.add_image_measurement(self.measurement_name(FTR_ADJUSTED_RAND_INDEX), adjusted_rand_index) def subscripts(condition1, condition2): x1,y1 = np.where(GT_pixels == condition1) x2,y2 = np.where(ID_pixels == condition2) mask = set(zip(x1,y1)) & set(zip(x2,y2)) return list(mask) TP_mask = subscripts(1,1) FN_mask = subscripts(1,0) FP_mask = subscripts(0,1) TN_mask = subscripts(0,0) TP_pixels = np.zeros((xGT,yGT)) FN_pixels = np.zeros((xGT,yGT)) FP_pixels = np.zeros((xGT,yGT)) TN_pixels = np.zeros((xGT,yGT)) def maskimg(mask,img): for ea in mask: img[ea] = 1 return img TP_pixels = maskimg(TP_mask, TP_pixels) FN_pixels = maskimg(FN_mask, FN_pixels) FP_pixels = maskimg(FP_mask, FP_pixels) TN_pixels = maskimg(TN_mask, TN_pixels) if self.wants_emd: emd = self.compute_emd(objects_ID, objects_GT) m.add_image_measurement( self.measurement_name(FTR_EARTH_MOVERS_DISTANCE), emd) if self.show_window: workspace.display_data.true_positives = TP_pixels workspace.display_data.true_negatives = FN_pixels workspace.display_data.false_positives = FP_pixels workspace.display_data.false_negatives = TN_pixels workspace.display_data.statistics = [ (FTR_F_FACTOR, F_factor), (FTR_PRECISION, precision), (FTR_RECALL, recall), (FTR_FALSE_POS_RATE, false_positive_rate), (FTR_FALSE_NEG_RATE, false_negative_rate), (FTR_RAND_INDEX, rand_index), (FTR_ADJUSTED_RAND_INDEX, adjusted_rand_index) ] if self.wants_emd: workspace.display_data.statistics.append( (FTR_EARTH_MOVERS_DISTANCE, emd))
def __convert_sparse_to_dense(self): from cellprofiler.utilities.hdf5_dict import HDF5ObjectSet sparse = self.get_sparse() if len(sparse) == 0: return self.__set_or_cache_dense( np.zeros([1] + list(self.shape), np.uint16)) # # The code below assigns a "color" to each label so that no # two labels have the same color # positional_columns = [] available_columns = [] lexsort_columns = [] for axis in HDF5ObjectSet.AXES: if axis in sparse.dtype.fields.keys(): positional_columns.append(sparse[axis]) available_columns.append(sparse[axis]) lexsort_columns.insert(0, sparse[axis]) else: positional_columns.append(0) labels = sparse[HDF5ObjectSet.AXIS_LABELS] lexsort_columns.insert(0, labels) sort_order = np.lexsort(lexsort_columns) n_labels = np.max(labels) # # Find the first of a run that's different from the rest # mask = available_columns[0][sort_order[:-1]] != \ available_columns[0][sort_order[1:]] for column in available_columns[1:]: mask = mask | (column[sort_order[:-1]] != column[sort_order[1:]]) breaks = np.hstack(([0], np.where(mask)[0] + 1, [len(labels)])) firsts = breaks[:-1] counts = breaks[1:] - firsts indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # mask = counts > 1 firsts = firsts[mask] counts = counts[mask] if len(counts) == 0: dense = np.zeros([1] + list(self.shape), labels.dtype) dense[[0] + positional_columns] = labels return self.__set_or_cache_dense(dense) # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (indexes) and for the outputs # (first and second of the pairs) # # Remember idx points into sort_order which points into labels # to get the nth label, grouped into consecutive positions. # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) # # The start of the run of overlaps and the offsets # run_starts = firsts[output_indexer.rev_idx] offs = pairs[output_indexer.idx[0], :] first = labels[sort_order[run_starts + offs[:, 0]]] second = labels[sort_order[run_starts + offs[:, 1]]] # # And sort these so that we get consecutive lists for each # pair_sort_order = np.lexsort((second, first)) # # Eliminate dupes # to_keep = np.hstack( ([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) to_keep = to_keep & (first != second) pair_idx = pair_sort_order[to_keep] first = first[pair_idx] second = second[pair_idx] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first.astype(np.int32)) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label. All non-overlapping # objects are assigned to plane 1 # v_color = np.ones(n_labels + 1, int) v_color[0] = 0 # # Clear all overlapping objects # v_color[np.unique(first)] = 0 # # The processing order is from most overlapping to least # ol_labels = np.where(overlap_counts > 0)[0] processing_order = np.lexsort((ol_labels, overlap_counts[ol_labels])) for index in ol_labels[processing_order]: neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors) + 1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Create the dense matrix by using the color to address the # 5-d hyperplane into which we place each label # result = [] dense = np.zeros([np.max(v_color)] + list(self.shape), labels.dtype) slices = tuple([v_color[labels] - 1] + positional_columns) dense[slices] = labels indices = [ np.where(v_color == i)[0] for i in range(1, dense.shape[0] + 1) ] return self.__set_or_cache_dense(dense, indices)