def fit(self,
            *,
            timeout: float = None,
            iterations: int = None) -> CallResult[None]:
        """
        Creates the image generators and then trains RetinaNet model on the image paths in the input
        dataframe column.

        Can choose to use validation generator.
        """

        # Create object that stores backbone information
        self.backbone = models.backbone(self.hyperparams["backbone"])

        # Create the generators
        train_generator = CSVGenerator(
            self.annotations,
            self.classes,
            self.base_dir,
            self.hyperparams["batch_size"],
            self.backbone.preprocess_image,
            shuffle_groups=False,
        )

        ## Create model
        logger.info("Creating model...")

        model, training_model, prediction_model = self._create_models(
            backbone_retinanet=self.backbone.retinanet,
            num_classes=train_generator.num_classes(),
            lr=self.hyperparams["learning_rate"],
        )

        ## Set up callbacks
        callbacks = self._create_callbacks(
            model,
            training_model,
            prediction_model,
        )

        start_time = time.time()
        logger.info("Starting training...")

        training_model.fit_generator(
            generator=train_generator,
            steps_per_epoch=self.hyperparams["n_steps"],
            epochs=self.hyperparams["n_epochs"],
            verbose=1,
            callbacks=callbacks,
        )

        training_model.save_weights(self.hyperparams["weights_path"] +
                                    "model_weights.h5")

        logger.info(
            f"Training complete. Training took {time.time()-start_time} seconds."
        )
        return CallResult(None)
Exemple #2
0
    def fit(self,
            *,
            timeout: float = None,
            iterations: int = None) -> CallResult[None]:
        """
        Creates the image generators and then trains RetinaNet model on the image paths in the input 
        dataframe column.

        Can choose to use validation generator. 
        
        If no weight file is provided, the default is to use the ImageNet weights.
        """

        # Create object that stores backbone information
        self.backbone = models.backbone(self.hyperparams['backbone'])

        # Create the generators
        train_generator = CSVGenerator(self.annotations, self.classes,
                                       self.base_dir,
                                       self.hyperparams['batch_size'],
                                       self.backbone.preprocess_image)

        # Running the model
        ## Assign weights
        if self.hyperparams['weights'] is False:
            weights = None
        else:
            weights = self.volumes[self.hyperparams['backbone']]

        ## Create model
        print('Creating model...', file=sys.__stdout__)

        model, self.training_model, prediction_model = self._create_models(
            backbone_retinanet=self.backbone.retinanet,
            num_classes=train_generator.num_classes(),
            weights=weights,
            freeze_backbone=self.hyperparams['freeze_backbone'],
            lr=self.hyperparams['learning_rate'])

        model.summary()

        ### !!! vgg AND densenet BACKBONES CURRENTLY NOT IMPLEMENTED !!!
        ## Let the generator compute the backbone layer shapes using the actual backbone model
        # if 'vgg' in self.hyperparams['backbone'] or 'densenet' in self.hyperparams['backbone']:
        #     train_generator.compute_shapes = make_shapes_callback(model)
        #     if validation_generator:
        #         validation_generator.compute_shapes = train_generator.compute_shapes

        ## Set up callbacks
        callbacks = self._create_callbacks(
            model,
            self.training_model,
            prediction_model,
        )

        start_time = time.time()
        print('Starting training...', file=sys.__stdout__)

        self.training_model.fit_generator(
            generator=train_generator,
            steps_per_epoch=self.hyperparams['n_steps'],
            epochs=self.hyperparams['n_epochs'],
            verbose=1,
            callbacks=callbacks,
            workers=self.workers,
            use_multiprocessing=self.multiprocessing,
            max_queue_size=self.max_queue_size)

        print(
            f'Training complete. Training took {time.time()-start_time} seconds.',
            file=sys.__stdout__)
        return CallResult(None)
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Produce image detection predictions.

        Parameters
        ----------
            inputs  : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name,
                      and bounding box for each image.

        Returns
        -------
            outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as
                      a string (8 coordinate format), and confidence scores.
        """
        iou_threshold = (
            0.5  # Bounding box overlap threshold for false positive or true positive
        )
        score_threshold = 0.05  # The score confidence threshold to use for detections
        max_detections = 100  # Maxmimum number of detections to use per image

        # Create object that stores backbone information
        backbone = models.backbone(self.hyperparams["backbone"])

        # Create the generators
        train_generator = CSVGenerator(
            self.annotations,
            self.classes,
            self.base_dir,
            self.hyperparams["batch_size"],
            backbone.preprocess_image,
            shuffle_groups=False,
        )

        # Instantiate model
        model, training_model, prediction_model = self._create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=train_generator.num_classes(),
            lr=self.hyperparams["learning_rate"],
        )

        # Load model weights saved in fit
        training_model.load_weights(self.hyperparams["weights_path"] +
                                    "model_weights.h5")

        # Convert training model to inference model
        inference_model = models.convert_model(training_model)

        # Generate image paths
        image_cols = inputs.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/FileName")
        self.base_dir = [
            inputs.metadata.query(
                (metadata_base.ALL_ELEMENTS,
                 t))["location_base_uris"][0].replace("file:///", "/")
            for t in image_cols
        ]
        self.image_paths = np.array([[
            os.path.join(self.base_dir, filename)
            for filename in inputs.iloc[:, col]
        ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten()
        self.image_paths = pd.Series(self.image_paths)

        # Initialize output objects
        box_list = []
        score_list = []
        image_name_list = []

        # Predict bounding boxes and confidence scores for each image
        image_list = [
            x for i, x in enumerate(self.image_paths.tolist())
            if self.image_paths.tolist().index(x) == i
        ]

        start_time = time.time()
        logger.info("Starting testing...")

        for i in image_list:
            image = read_image_bgr(i)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            boxes, scores, labels = inference_model.predict_on_batch(
                tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32))

            # correct for image scale
            boxes /= scale

            for box, score in zip(boxes[0], scores[0]):
                if score < 0.5:
                    break

                b = box.astype(int)
                box_list.append(b)
                score_list.append(score)
                image_name_list.append(i * len(b))

        logger.info(
            f"Testing complete. Testing took {time.time()-start_time} seconds."
        )

        ## Convert predicted boxes from a list of arrays to a list of strings
        boxes = np.array(box_list).tolist()
        boxes = list(
            map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]],
                boxes))  # Convert to 8 coordinate format for D3M
        boxes = list(map(lambda x: ",".join(map(str, x)), boxes))

        # Create mapping between image names and D3M index
        input_df = pd.DataFrame({
            "d3mIndex":
            inputs.d3mIndex,
            "image": [os.path.basename(list) for list in self.image_paths],
        })

        d3mIdx_image_mapping = input_df.set_index("image").T.to_dict("list")

        # Extract values for image name keys and get missing image predictions (if they exist)
        image_name_list = [os.path.basename(list) for list in image_name_list]
        d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list]
        empty_predictions_image_names = [
            k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx
        ]
        d3mIdx = [item for sublist in d3mIdx
                  for item in sublist]  # Flatten list of lists

        ## Assemble in a Pandas DataFrame
        results = pd.DataFrame({
            "d3mIndex": d3mIdx,
            "bounding_box": boxes,
            "confidence": score_list
        })

        # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return
        # predictions for an image, create a dummy empty prediction row to add to results_df for that
        # missing image.
        if len(empty_predictions_image_names) != 0:
            # Create data frame of empty predictions for missing each image and concat with results.
            # Sort results_df.
            empty_predictions_df = self._fill_empty_predictions(
                empty_predictions_image_names, d3mIdx_image_mapping)
            results_df = pd.concat([results, empty_predictions_df
                                    ]).sort_values("d3mIndex")
        else:
            results_df = results

        # Convert to DataFrame container
        results_df = d3m_DataFrame(results_df)

        ## Assemble first output column ('d3mIndex)
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "d3mIndex"
        col_dict["semantic_types"] = (
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/PrimaryKey",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 0), col_dict)

        ## Assemble second output column ('bounding_box')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "bounding_box"
        col_dict["semantic_types"] = (
            "http://schema.org/Text",
            "https://metadata.datadrivendiscovery.org/types/PredictedTarget",
            "https://metadata.datadrivendiscovery.org/types/BoundingPolygon",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 1), col_dict)

        ## Assemble third output column ('confidence')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "confidence"
        col_dict["semantic_types"] = (
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/Score",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 2), col_dict)

        return CallResult(results_df)