class BatchScorer: def __init__(self, **kwargs): print('BatchScorer, __init__()') model_path = kwargs.get('model_path') self.detector = TFDetector(model_path) self.job_id = kwargs.get('job_id') self.input_container_sas = kwargs.get('input_container_sas') self.output_dir = kwargs.get('output_dir') self.detection_threshold = kwargs.get('detection_threshold') self.batch_size = kwargs.get('batch_size') self.image_ids = kwargs.get('image_ids') self.images = [] self.detections = [] self.image_ids = [] # all the IDs of the images that PIL successfully opened self.failed_images = [] # list of image_ids that failed to open @staticmethod def get_account_from_uri(sas_uri): url_parts = parse.urlsplit(sas_uri) loc = url_parts.netloc return loc.split('.')[0] @staticmethod def get_sas_key_from_uri(sas_uri): """Get the query part of the SAS token that contains permissions, access times and signature. Args: sas_uri: Azure blob storage SAS token Returns: Query part of the SAS token. """ url_parts = parse.urlsplit(sas_uri) return url_parts.query @staticmethod def get_container_from_uri(sas_uri): url_parts = parse.urlsplit(sas_uri) raw_path = url_parts.path[1:] container = raw_path.split('/')[0] return container def download_images(self): print('BatchScorer, download_images()') blob_service = BlockBlobService( account_name=BatchScorer.get_account_from_uri(self.input_container_sas), sas_token=BatchScorer.get_sas_key_from_uri(self.input_container_sas)) container_name = BatchScorer.get_container_from_uri(self.input_container_sas) for image_id in image_ids: try: stream = io.BytesIO() _ = blob_service.get_blob_to_stream(container_name, image_id, stream) image = TFDetector.open_image(stream) image = TFDetector.resize_image(image) # image loaded here self.images.append(image) self.image_ids.append(image_id) except Exception as e: print('score.py, failed to download or open image {}, exception: {}'.format(image_id, str(e))) self.failed_images.append(image_id) continue def score(self): print('BatchScorer, score()') # self.image_ids does not include any failed images; self.image_ids is overwritten here self.detections, self.image_ids, failed_images_during_detection = \ self.detector.generate_detections_batch( self.images, self.image_ids, self.batch_size, self.detection_threshold) self.failed_images.extend(failed_images_during_detection) def write_output(self): """Outputs csv where each row is image_path, True if there is detection, "[ [y1, x1, y2, x2, confidence], [...] ]" """ print('BatchScorer, write_output()') detections_path = os.path.join(self.output_dir, 'detections_{}.csv'.format(self.job_id)) with open(detections_path, 'w', newline='') as f: writer = csv.writer(f, delimiter=',') writer.writerow(['image_path', 'max_confidence', 'detections']) for image_id, detections in zip(self.image_ids, self.detections): # has_detections = len(detections) > 0 max_conf = 0.0 # detections should be sorted from higih to low confidence but to be sure for detection in detections: conf = detection[4] if conf > max_conf: max_conf = conf writer.writerow([image_id, max_conf, str(detections)]) failed_path = os.path.join(self.output_dir, 'failures_{}.csv'.format(self.job_id)) with open(failed_path, 'w', newline='') as f: writer = csv.writer(f) for i in self.failed_images: writer.writerow([i])
classification_names = config_file["classes"].split(",") inference_graph_path = str( Path(config_file["inference_output_dir"]) / "frozen_inference_graph.pb") supported_file_type = config_file["filetype"] #TODO: Make sure $PYTHONPATH has this in it --> /opt/caffe/python:/opt/caffe2/build: #TODO: make sure tagged.csv exists cur_tagged = config_file["tagged_output"] # These are the "tagging in progress" labels. Meaning they will have null labels and class names # This file needs to exist even if it's empty cur_tagging = config_file[ "tagging_output"] # This is a new config key we are adding for training V2 logging.info("\n****Initializing TF Detector...****") cur_detector = TFDetector(classification_names, inference_graph_path) logging.info("\n****Initializing TF Detector DONE****") logging.info("\n****Creating Suggestions****") get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, filetype=supported_file_type, min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"] == "True") logging.info("\n****Creating Suggestions DONE****")
class BatchScorer: """ Coordinates scoring a batch of images using model at model_path. The images are first downloaded and all loaded before scoring is performed. """ def __init__(self, **kwargs): print('BatchScorer, __init__()') model_path = kwargs.get('model_path') self.detector = TFDetector(model_path) self.job_id = kwargs.get('job_id') self.input_container_sas = kwargs.get('input_container_sas') self.output_dir = kwargs.get('output_dir') self.detection_threshold = kwargs.get('detection_threshold') self.image_ids_to_score = kwargs.get('image_ids_to_score') self.use_url = kwargs.get('use_url') self.images = [] # determine if there is metadata attached to each image_id self.metadata_available = True if isinstance( self.image_ids_to_score[0], list) else False self.detections = [] self.image_ids = [ ] # all the IDs of the images that PIL successfully opened self.image_metas = [ ] # if metadata came with the list of image_ids, keep them here self.failed_images = [ ] # list of image_ids that failed to open or be processed self.failed_metas = [] # their corresponding metadata def download_images(self): print( 'BatchScorer, download_images(), use_url is {}, metadata_available is {}' .format(self.use_url, self.metadata_available)) if not self.use_url: print('blob_service created') blob_service = SasBlob.get_service_from_uri( self.input_container_sas) container_name = SasBlob.get_container_from_uri( self.input_container_sas) for i in self.image_ids_to_score: if self.metadata_available: image_id = i[0] image_meta = i[1] else: image_id = i image_meta = None try: if self.use_url: # im_to_open will be a tempfile with a generated name im_to_open, headers = request.urlretrieve( image_id) # TODO do not save to disk else: im_to_open = io.BytesIO() _ = blob_service.get_blob_to_stream( container_name, image_id, im_to_open) # open is lazy; load() loads the image so we know it can be read successfully image = TFDetector.open_image(im_to_open) image.load() self.images.append(image) self.image_ids.append(image_id) self.image_metas.append(image_meta) except Exception as e: print( 'score.py, failed to download or open image {}: {}'.format( image_id, str(e))) self.failed_images.append(image_id) self.failed_metas.append(image_meta) continue def score(self): print('BatchScorer, score()') # self.image_ids does not include any failed images; self.image_ids is overwritten here self.detections, failed_images, failed_metas = self.detector.generate_detections_batch( images=self.images, image_ids=self.image_ids, detection_threshold=self.detection_threshold, image_metas=self.image_metas, metadata_available=self.metadata_available) self.failed_images.extend(failed_images) self.failed_metas.extend(failed_metas) def write_output(self): """Uploads a json containing all the detections (a subset of the "images" field of the final output), as well as a json list of failed images. """ print('BatchScorer, write_output()') detections_path = os.path.join( self.output_dir, 'detections_{}.json'.format(self.job_id)) with open(detections_path, 'w') as f: json.dump(self.detections, f, indent=1) failed_path = os.path.join(self.output_dir, 'failures_{}.json'.format(self.job_id)) if self.metadata_available: failed_items = [[ image_id, meta ] for (image_id, meta) in zip(self.failed_images, self.failed_metas)] else: failed_items = self.failed_images with open(failed_path, 'w') as f: json.dump(failed_items, f, indent=1)