Exemple #1
0
class BatchScorer:

    def __init__(self, **kwargs):
        print('BatchScorer, __init__()')

        model_path = kwargs.get('model_path')
        self.detector = TFDetector(model_path)

        self.job_id = kwargs.get('job_id')

        self.input_container_sas = kwargs.get('input_container_sas')
        self.output_dir = kwargs.get('output_dir')

        self.detection_threshold = kwargs.get('detection_threshold')
        self.batch_size = kwargs.get('batch_size')

        self.image_ids = kwargs.get('image_ids')
        self.images = []

        self.detections = []
        self.image_ids = []  # all the IDs of the images that PIL successfully opened
        self.failed_images = []  # list of image_ids that failed to open

    @staticmethod
    def get_account_from_uri(sas_uri):
        url_parts = parse.urlsplit(sas_uri)
        loc = url_parts.netloc
        return loc.split('.')[0]

    @staticmethod
    def get_sas_key_from_uri(sas_uri):
        """Get the query part of the SAS token that contains permissions, access times and
        signature.

        Args:
            sas_uri: Azure blob storage SAS token

        Returns: Query part of the SAS token.
        """
        url_parts = parse.urlsplit(sas_uri)
        return url_parts.query

    @staticmethod
    def get_container_from_uri(sas_uri):
        url_parts = parse.urlsplit(sas_uri)

        raw_path = url_parts.path[1:]
        container = raw_path.split('/')[0]

        return container

    def download_images(self):
        print('BatchScorer, download_images()')

        blob_service = BlockBlobService(
            account_name=BatchScorer.get_account_from_uri(self.input_container_sas),
            sas_token=BatchScorer.get_sas_key_from_uri(self.input_container_sas))
        container_name = BatchScorer.get_container_from_uri(self.input_container_sas)

        for image_id in image_ids:
            try:
                stream = io.BytesIO()
                _ = blob_service.get_blob_to_stream(container_name, image_id, stream)
                image = TFDetector.open_image(stream)
                image = TFDetector.resize_image(image)  # image loaded here
                self.images.append(image)
                self.image_ids.append(image_id)
            except Exception as e:
                print('score.py, failed to download or open image {}, exception: {}'.format(image_id, str(e)))
                self.failed_images.append(image_id)
                continue

    def score(self):
        print('BatchScorer, score()')
        # self.image_ids does not include any failed images; self.image_ids is overwritten here
        self.detections, self.image_ids, failed_images_during_detection = \
            self.detector.generate_detections_batch(
                self.images, self.image_ids, self.batch_size, self.detection_threshold)

        self.failed_images.extend(failed_images_during_detection)

    def write_output(self):
        """Outputs csv where each row is image_path, True if there is detection,
           "[ [y1, x1, y2, x2, confidence], [...] ]"
        """
        print('BatchScorer, write_output()')
        detections_path = os.path.join(self.output_dir, 'detections_{}.csv'.format(self.job_id))

        with open(detections_path, 'w', newline='') as f:
            writer = csv.writer(f, delimiter=',')
            writer.writerow(['image_path', 'max_confidence', 'detections'])
            for image_id, detections in zip(self.image_ids, self.detections):
                # has_detections = len(detections) > 0
                max_conf = 0.0  # detections should be sorted from higih to low confidence but to be sure
                for detection in detections:
                    conf = detection[4]
                    if conf > max_conf:
                        max_conf = conf
                writer.writerow([image_id, max_conf, str(detections)])

        failed_path = os.path.join(self.output_dir, 'failures_{}.csv'.format(self.job_id))
        with open(failed_path, 'w', newline='') as f:
            writer = csv.writer(f)
            for i in self.failed_images:
                writer.writerow([i])
Exemple #2
0
    classification_names = config_file["classes"].split(",")
    inference_graph_path = str(
        Path(config_file["inference_output_dir"]) /
        "frozen_inference_graph.pb")
    supported_file_type = config_file["filetype"]

    #TODO: Make sure $PYTHONPATH has this in it --> /opt/caffe/python:/opt/caffe2/build:

    #TODO: make sure tagged.csv exists
    cur_tagged = config_file["tagged_output"]

    # These are the "tagging in progress" labels. Meaning they will have null labels and class names
    # This file needs to exist even if it's empty
    cur_tagging = config_file[
        "tagging_output"]  # This is a new config key we are adding for training V2

    logging.info("\n****Initializing TF Detector...****")
    cur_detector = TFDetector(classification_names, inference_graph_path)
    logging.info("\n****Initializing TF Detector DONE****")

    logging.info("\n****Creating Suggestions****")
    get_suggestions(cur_detector,
                    image_dir,
                    untagged_output,
                    tagged_output,
                    cur_tagged,
                    cur_tagging,
                    filetype=supported_file_type,
                    min_confidence=float(config_file["min_confidence"]),
                    user_folders=config_file["user_folders"] == "True")
    logging.info("\n****Creating Suggestions DONE****")
Exemple #3
0
class BatchScorer:
    """
    Coordinates scoring a batch of images using model at model_path.
    The images are first downloaded and all loaded before scoring is performed.
    """
    def __init__(self, **kwargs):
        print('BatchScorer, __init__()')

        model_path = kwargs.get('model_path')
        self.detector = TFDetector(model_path)

        self.job_id = kwargs.get('job_id')

        self.input_container_sas = kwargs.get('input_container_sas')
        self.output_dir = kwargs.get('output_dir')

        self.detection_threshold = kwargs.get('detection_threshold')

        self.image_ids_to_score = kwargs.get('image_ids_to_score')
        self.use_url = kwargs.get('use_url')
        self.images = []

        # determine if there is metadata attached to each image_id
        self.metadata_available = True if isinstance(
            self.image_ids_to_score[0], list) else False

        self.detections = []
        self.image_ids = [
        ]  # all the IDs of the images that PIL successfully opened
        self.image_metas = [
        ]  # if metadata came with the list of image_ids, keep them here
        self.failed_images = [
        ]  # list of image_ids that failed to open or be processed
        self.failed_metas = []  # their corresponding metadata

    def download_images(self):

        print(
            'BatchScorer, download_images(), use_url is {}, metadata_available is {}'
            .format(self.use_url, self.metadata_available))

        if not self.use_url:
            print('blob_service created')
            blob_service = SasBlob.get_service_from_uri(
                self.input_container_sas)
            container_name = SasBlob.get_container_from_uri(
                self.input_container_sas)

        for i in self.image_ids_to_score:
            if self.metadata_available:
                image_id = i[0]
                image_meta = i[1]
            else:
                image_id = i
                image_meta = None

            try:
                if self.use_url:
                    # im_to_open will be a tempfile with a generated name
                    im_to_open, headers = request.urlretrieve(
                        image_id)  # TODO do not save to disk
                else:
                    im_to_open = io.BytesIO()
                    _ = blob_service.get_blob_to_stream(
                        container_name, image_id, im_to_open)

                # open is lazy; load() loads the image so we know it can be read successfully
                image = TFDetector.open_image(im_to_open)
                image.load()

                self.images.append(image)
                self.image_ids.append(image_id)
                self.image_metas.append(image_meta)
            except Exception as e:
                print(
                    'score.py, failed to download or open image {}: {}'.format(
                        image_id, str(e)))
                self.failed_images.append(image_id)
                self.failed_metas.append(image_meta)
                continue

    def score(self):
        print('BatchScorer, score()')
        # self.image_ids does not include any failed images; self.image_ids is overwritten here
        self.detections, failed_images, failed_metas = self.detector.generate_detections_batch(
            images=self.images,
            image_ids=self.image_ids,
            detection_threshold=self.detection_threshold,
            image_metas=self.image_metas,
            metadata_available=self.metadata_available)

        self.failed_images.extend(failed_images)
        self.failed_metas.extend(failed_metas)

    def write_output(self):
        """Uploads a json containing all the detections (a subset of the "images" field of the final
        output), as well as a json list of failed images.
        """
        print('BatchScorer, write_output()')

        detections_path = os.path.join(
            self.output_dir, 'detections_{}.json'.format(self.job_id))

        with open(detections_path, 'w') as f:
            json.dump(self.detections, f, indent=1)

        failed_path = os.path.join(self.output_dir,
                                   'failures_{}.json'.format(self.job_id))

        if self.metadata_available:
            failed_items = [[
                image_id, meta
            ] for (image_id,
                   meta) in zip(self.failed_images, self.failed_metas)]
        else:
            failed_items = self.failed_images

        with open(failed_path, 'w') as f:
            json.dump(failed_items, f, indent=1)