def update_detection_table(RepeatDetectionResults, options, outputFilename=None): detectionResults = RepeatDetectionResults.detectionResults # An array of length nDirs, where each element is a list of DetectionLocation # objects for that directory that have been flagged as suspicious suspiciousDetectionsByDirectory = RepeatDetectionResults.suspiciousDetections nBboxChanges = 0 print('Updating output table') # For each suspicious detection (two loops) for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory): for iDetectionEvent, detectionEvent in enumerate(directoryEvents): locationBbox = detectionEvent.bbox for iInstance, instance in enumerate(detectionEvent.instances): instanceBbox = instance.bbox # This should match the bbox for the detection event iou = ct_utils.get_iou(instanceBbox, locationBbox) # There are instances where iou is very close to the threshold so cannot use > assert iou >= options.iouThreshold assert instance.filename in RepeatDetectionResults.filenameToRow iRow = RepeatDetectionResults.filenameToRow[instance.filename] row = detectionResults.iloc[iRow] rowDetections = row['detections'] detectionToModify = rowDetections[instance.iDetection] # Make sure the bounding box matches assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3]) # Make the probability negative, if it hasn't been switched by # another bounding box if detectionToModify['conf'] >= 0: detectionToModify['conf'] = -1 * detectionToModify['conf'] nBboxChanges += 1 # ...for each instance # ...for each detection # ...for each directory # Update maximum probabilities # For each row... nProbChanges = 0 nProbChangesToNegative = 0 nProbChangesAcrossThreshold = 0 for iRow, row in detectionResults.iterrows(): detections = row['detections'] if len(detections) == 0: continue maxPOriginal = float(row['max_detection_conf']) assert maxPOriginal >= 0 maxP = None nNegative = 0 for iDetection, detection in enumerate(detections): p = detection['conf'] if p < 0: nNegative += 1 if (maxP is None) or (p > maxP): maxP = p if abs(maxP - maxPOriginal) > 0.00000001: # We should only be making detections *less* likely assert maxP < maxPOriginal # row['max_confidence'] = str(maxP) detectionResults.at[iRow, 'max_detection_conf'] = maxP nProbChanges += 1 if maxP < 0: nProbChangesToNegative += 1 if maxPOriginal >= options.confidenceMin and maxP < options.confidenceMin: nProbChangesAcrossThreshold += 1 # Negative probabilities should be the only reason maxP changed, so # we should have found at least one negative value assert nNegative > 0 # ...if there was a change to the max probability for this row # ...for each row if outputFilename is not None: write_api_results(detectionResults, RepeatDetectionResults.otherFields, outputFilename) print( 'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)' .format(nBboxChanges, nProbChanges, nProbChangesToNegative, nProbChangesAcrossThreshold)) return detectionResults
def find_matches_in_directory(dirName, options, rowsByDirectory): if options.pbar is not None: options.pbar.update() # List of DetectionLocations candidateDetections = [] rows = rowsByDirectory[dirName] # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow] for iDirectoryRow, row in rows.iterrows(): filename = row['file'] if not ct_utils.is_image_file(filename): continue # Don't bother checking images with no detections above threshold maxP = float(row['max_detection_conf']) if maxP < options.confidenceMin: continue # Array of dict, where each element is # { # 'category': '1', # str value, category ID # 'conf': 0.926, # confidence of this detections # 'bbox': [x_min, y_min, width_of_box, height_of_box] # (x_min, y_min) is upper-left, # all in relative coordinates and length # } detections = row['detections'] assert len(detections) > 0 # For each detection in this image for iDetection, detection in enumerate(detections): assert 'category' in detection and 'conf' in detection and 'bbox' in detection confidence = detection['conf'] assert confidence >= 0.0 and confidence <= 1.0 if confidence < options.confidenceMin: continue if confidence > options.confidenceMax: continue # Optionally exclude some classes from consideration as suspicious if len(options.excludeClasses) > 0: iClass = int(detection['category']) if iClass in options.excludeClasses: continue bbox = detection['bbox'] # Is this detection too big to be suspicious? w, h = bbox[2], bbox[3] area = h * w # These are relative coordinates assert area >= 0.0 and area <= 1.0 if area > options.maxSuspiciousDetectionSize: # print('Ignoring very large detection with area {}'.format(area)) continue instance = IndexedDetection(iDetection, row['file'], bbox) bFoundSimilarDetection = False # For each detection in our candidate list for iCandidate, candidate in enumerate(candidateDetections): # Is this a match? iou = ct_utils.get_iou(bbox, candidate.bbox) if iou >= options.iouThreshold: bFoundSimilarDetection = True # If so, add this example to the list for this detection candidate.instances.append(instance) # We *don't* break here; we allow this instance to possibly # match multiple candidates. There isn't an obvious right or # wrong here. # ...for each detection on our candidate list # If we found no matches, add this to the candidate list if not bFoundSimilarDetection: candidate = DetectionLocation(instance, detection, dirName) candidateDetections.append(candidate) # ...for each detection # ...for each row return candidateDetections
def find_matches_in_directory(dirName, options, rowsByDirectory): if options.pbar is not None: options.pbar.update() # List of DetectionLocations candidateDetections = [] rows = rowsByDirectory[dirName] # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow] for iDirectoryRow, row in rows.iterrows(): filename = row['file'] if not ct_utils.is_image_file(filename): continue # Don't bother checking images with no detections above threshold maxP = float(row['max_detection_conf']) if maxP < options.confidenceMin: continue # Array of dicts, where each element is # { # 'category': '1', # str value, category ID # 'conf': 0.926, # confidence of this detections # 'bbox': [x_min, y_min, width_of_box, height_of_box] # (x_min, y_min) is upper-left, # all in relative coordinates and length # } detections = row['detections'] if isinstance(detections, float): assert isinstance(row['failure'], str) print('Skipping failed image {} ({})'.format( filename, row['failure'])) continue assert len(detections) > 0 # For each detection in this image for iDetection, detection in enumerate(detections): assert 'category' in detection and 'conf' in detection and 'bbox' in detection confidence = detection['conf'] # This is no longer strictly true; I sometimes run RDE in stages, so # some probabilities have already been made negative # assert confidence >= 0.0 and confidence <= 1.0 assert confidence >= -1.0 and confidence <= 1.0 if confidence < options.confidenceMin: continue if confidence > options.confidenceMax: continue # Optionally exclude some classes from consideration as suspicious if len(options.excludeClasses) > 0: iClass = int(detection['category']) if iClass in options.excludeClasses: continue bbox = detection['bbox'] confidence = detection['conf'] # Is this detection too big to be suspicious? w, h = bbox[2], bbox[3] if (w == 0 or h == 0): # print('Illegal zero-size bounding box on image {}'.format(filename)) continue area = h * w # These are relative coordinates assert area >= 0.0 and area <= 1.0, 'Illegal bounding box area {}'.format( area) if area > options.maxSuspiciousDetectionSize: # print('Ignoring very large detection with area {}'.format(area)) continue category = detection['category'] instance = IndexedDetection(iDetection=iDetection, filename=row['file'], bbox=bbox, confidence=confidence, category=category) bFoundSimilarDetection = False # For each detection in our candidate list for iCandidate, candidate in enumerate(candidateDetections): # Is this a match? try: iou = ct_utils.get_iou(bbox, candidate.bbox) except Exception as e: print( 'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}' .format(bbox[0], bbox[1], bbox[2], bbox[3], candidate.bbox[0], candidate.bbox[1], candidate.bbox[2], candidate.bbox[3], str(e))) continue if iou >= options.iouThreshold: bFoundSimilarDetection = True # If so, add this example to the list for this detection candidate.instances.append(instance) # We *don't* break here; we allow this instance to possibly # match multiple candidates. There isn't an obvious right or # wrong here. # ...for each detection on our candidate list # If we found no matches, add this to the candidate list if not bFoundSimilarDetection: candidate = DetectionLocation(instance, detection, dirName) candidateDetections.append(candidate) # ...for each detection # ...for each row return candidateDetections
def update_detection_table(RepeatDetectionResults, options, outputFilename=None): detectionResults = RepeatDetectionResults.detectionResults # An array of length nDirs, where each element is a list of DetectionLocation # objects for that directory that have been flagged as suspicious suspiciousDetectionsByDirectory = RepeatDetectionResults.suspiciousDetections nBboxChanges = 0 print('Updating output table') # For each directory for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory): # For each suspicious detection group in this directory for iDetectionEvent, detectionEvent in enumerate(directoryEvents): locationBbox = detectionEvent.bbox # For each instance of this suspicious detection for iInstance, instance in enumerate(detectionEvent.instances): instanceBbox = instance.bbox # This should match the bbox for the detection event iou = ct_utils.get_iou(instanceBbox, locationBbox) # The bbox for this instance should be almost the same as the bbox # for this detection group, where "almost" is defined by the IOU # threshold. assert iou >= options.iouThreshold # if iou < options.iouThreshold: # print('IOU warning: {},{}'.format(iou,options.iouThreshold)) assert instance.filename in RepeatDetectionResults.filenameToRow iRow = RepeatDetectionResults.filenameToRow[instance.filename] row = detectionResults.iloc[iRow] rowDetections = row['detections'] detectionToModify = rowDetections[instance.iDetection] # Make sure the bounding box matches assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3]) # Make the probability negative, if it hasn't been switched by # another bounding box if detectionToModify['conf'] >= 0: detectionToModify['conf'] = -1 * detectionToModify['conf'] nBboxChanges += 1 # ...for each instance # ...for each detection # ...for each directory # Update maximum probabilities # For each row... nProbChanges = 0 nProbChangesToNegative = 0 nProbChangesAcrossThreshold = 0 for iRow, row in detectionResults.iterrows(): detections = row['detections'] if isinstance(detections, float): assert isinstance(row['failure'], str) continue if len(detections) == 0: continue maxPOriginal = float(row['max_detection_conf']) # No longer strictly true; sometimes I run RDE on RDE output # assert maxPOriginal >= 0 assert maxPOriginal >= -1.0 maxP = None nNegative = 0 for iDetection, detection in enumerate(detections): p = detection['conf'] if p < 0: nNegative += 1 if (maxP is None) or (p > maxP): maxP = p if abs(maxP - maxPOriginal) > 1e-3: # We should only be making detections *less* likely assert maxP < maxPOriginal # row['max_confidence'] = str(maxP) detectionResults.at[iRow, 'max_detection_conf'] = maxP nProbChanges += 1 if (maxP < 0) and (maxPOriginal >= 0): nProbChangesToNegative += 1 if (maxPOriginal >= options.confidenceMin) and (maxP < options.confidenceMin): nProbChangesAcrossThreshold += 1 # Negative probabilities should be the only reason maxP changed, so # we should have found at least one negative value assert nNegative > 0 # ...if there was a meaningful change to the max probability for this row # ...for each row # If we're also writing output... if outputFilename is not None and len(outputFilename) > 0: write_api_results(detectionResults, RepeatDetectionResults.otherFields, outputFilename) print( 'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)' .format(nBboxChanges, nProbChanges, nProbChangesToNegative, nProbChangesAcrossThreshold)) return detectionResults
def find_matches_in_directory(dirName, options, rowsByDirectory): """ Find all unique detections in [dirName]. Returns a list of DetectionLocation objects. """ if options.pbar is not None: options.pbar.update() # List of DetectionLocations # candidateDetections = [] # Create a tree to store candidate detections candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1, -0.1, 1.1, 1.1)) # Each image in this folder is a row in "rows" rows = rowsByDirectory[dirName] if options.maxImagesPerFolder is not None and len( rows) > options.maxImagesPerFolder: print( 'Ignoring directory {} because it has {} images (limit set to {})'. format(dirName, len(rows), options.maxImagesPerFolder)) return [] if options.includeFolders is not None: assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists' if dirName not in options.includeFolders: print('Ignoring folder {}, not in inclusion list'.format(dirName)) return [] if options.excludeFolders is not None: assert options.includeFolders is None, 'Cannot specify include and exclude folder lists' if dirName in options.excludeFolders: print('Ignoring folder {}, on exclusion list'.format(dirName)) return [] # For each image in this directory # # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow] # # iDirectoryRow is a pandas index, so it may not start from zero; # for debugging, we maintain i_iteration as a loop index. i_iteration = -1 n_boxes_evaluated = 0 for iDirectoryRow, row in rows.iterrows(): i_iteration += 1 # print('Searching row {} of {} (index {}) in dir {}'.format(i_iteration,len(rows),iDirectoryRow,dirName)) filename = row['file'] if not ct_utils.is_image_file(filename): continue if 'max_detection_conf' not in row or 'detections' not in row or row[ 'detections'] is None: print('Skipping row {}'.format(iDirectoryRow)) continue # Don't bother checking images with no detections above threshold maxP = float(row['max_detection_conf']) if maxP < options.confidenceMin: continue # Array of dicts, where each element is # { # 'category': '1', # str value, category ID # 'conf': 0.926, # confidence of this detections # # # (x_min, y_min) is upper-left, all in relative coordinates # 'bbox': [x_min, y_min, width_of_box, height_of_box] # # } detections = row['detections'] if isinstance(detections, float): assert isinstance(row['failure'], str) print('Skipping failed image {} ({})'.format( filename, row['failure'])) continue assert len(detections) > 0 # For each detection in this image for iDetection, detection in enumerate(detections): n_boxes_evaluated += 1 if detection is None: print('Skipping detection {}'.format(iDetection)) continue assert 'category' in detection and 'conf' in detection and 'bbox' in detection confidence = detection['conf'] # This is no longer strictly true; I sometimes run RDE in stages, so # some probabilities have already been made negative # # assert confidence >= 0.0 and confidence <= 1.0 assert confidence >= -1.0 and confidence <= 1.0 if confidence < options.confidenceMin: continue if confidence > options.confidenceMax: continue # Optionally exclude some classes from consideration as suspicious if len(options.excludeClasses) > 0: iClass = int(detection['category']) if iClass in options.excludeClasses: continue bbox = detection['bbox'] confidence = detection['conf'] # Is this detection too big to be suspicious? w, h = bbox[2], bbox[3] if (w == 0 or h == 0): # print('Illegal zero-size bounding box on image {}'.format(filename)) continue area = h * w # These are relative coordinates assert area >= 0.0 and area <= 1.0, 'Illegal bounding box area {}'.format( area) if area > options.maxSuspiciousDetectionSize: # print('Ignoring very large detection with area {}'.format(area)) continue category = detection['category'] instance = IndexedDetection(iDetection=iDetection, filename=row['file'], bbox=bbox, confidence=confidence, category=category) bFoundSimilarDetection = False rtree_rect = detection_rect_to_rtree_rect(bbox) # This will return candidates of all classes overlappingCandidateDetections =\ candidateDetectionsIndex.intersect(rtree_rect) overlappingCandidateDetections.sort(key=lambda x: x.id, reverse=False) # For each detection in our candidate list for iCandidate, candidate in enumerate( overlappingCandidateDetections): # Don't match across categories if candidate.category != category: continue # Is this a match? try: iou = ct_utils.get_iou(bbox, candidate.bbox) except Exception as e: print( 'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}' .format(bbox[0], bbox[1], bbox[2], bbox[3], candidate.bbox[0], candidate.bbox[1], candidate.bbox[2], candidate.bbox[3], str(e))) continue if iou >= options.iouThreshold: bFoundSimilarDetection = True # If so, add this example to the list for this detection candidate.instances.append(instance) # We *don't* break here; we allow this instance to possibly # match multiple candidates. There isn't an obvious right or # wrong here. # ...for each detection on our candidate list # If we found no matches, add this to the candidate list if not bFoundSimilarDetection: candidate = DetectionLocation(instance=instance, detection=detection, relativeDir=dirName, category=category, id=i_iteration) # candidateDetections.append(candidate) # pyqtree candidateDetectionsIndex.insert(item=candidate, bbox=rtree_rect) # ...for each detection # ...for each row # Get all candidate detections candidateDetections = candidateDetectionsIndex.intersect( [-100, -100, 100, 100]) # print('Found {} candidate detections for folder {}'.format( # len(candidateDetections),dirName)) # For debugging only, it's convenient to have these sorted # as if they had never gone into a tree structure. Typically # this is in practce a sort by filename. candidateDetections.sort(key=lambda x: x.id, reverse=False) return candidateDetections