def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info(
        'ML Professoar HTTP trigger function TrainModel processed a request.')

    # Get Cognitive Services Environment Variables
    projectID = os.environ["projectID"]
    trainingKey = os.environ['trainingKey']
    endpoint = os.environ['clientEndpoint']
    publish_iteration_name = "SampleTreeDetection @ " + str(datetime.now())
    prediction_resource_id = os.environ['predictionID']

    trainer = CustomVisionTrainingClient(trainingKey, endpoint=endpoint)

    try:
        iteration = trainer.train_project(projectID, force_train=True)
        while (iteration.status != "Completed"):
            iteration = trainer.get_iteration(projectID, iteration.id)
            logging.info("Training status: " + iteration.status)
            time.sleep(1)

        # The iteration is now trained. Publish it to the project endpoint
        trainer.publish_iteration(projectID, iteration.id,
                                  publish_iteration_name,
                                  prediction_resource_id)

    except Exception as e:
        message = str(e)
        logging.info(message)

    return func.HttpResponse("Training complete for ProjectID: " + projectID +
                             " publisehed under iteration name: " +
                             publish_iteration_name,
                             status_code=400)
Beispiel #2
0
def main():
    """
    Training for object detection with Azure Custom Vision
    """
    args = parse_args()
    config = json.load(open(args.config, "r"))
    credentials = ApiKeyCredentials(in_headers={"Training-key": config["training_key"]})
    trainer = CustomVisionTrainingClient(config["ENDPOINT"], credentials)

    print("Creating project...")

    # Find the object detection domain
    obj_detection_domain = next(
        domain
        for domain in trainer.get_domains()
        if domain.type == "ObjectDetection" and domain.name == "General"
    )
    project = trainer.create_project(
        config["project_name"], domain_id=obj_detection_domain.id
    )

    # ======================================================================================

    print("Adding images...")
    image_folder = config["image_folder"]
    annotations = json.load(open("annotation.json", "r"))
    tagged_images_with_regions = []
    for label in annotations.keys():
        tagged_images_with_regions += add_image(
            trainer, label, project.id, annotations[label], image_folder
        )

    upload_result = trainer.create_images_from_files(
        project.id, ImageFileCreateBatch(images=tagged_images_with_regions)
    )
    if not upload_result.is_batch_successful:
        print("Image batch upload failed.")
        for image in upload_result.images:
            print("Image status: ", image.status)

    # ======================================================================================
    print("Training...")
    publish_iteration_name = config["publish_iteration_name"]
    prediction_resource_id = config["prediction_resource_id"]
    iteration = trainer.train_project(project.id)
    while iteration.status != "Completed":
        iteration = trainer.get_iteration(project.id, iteration.id)
        print("Training status: " + iteration.status)
        time.sleep(1)

    # The iteration is now trained. Publish it to the project endpoint
    trainer.publish_iteration(
        project.id, iteration.id, publish_iteration_name, prediction_resource_id
    )
    print("Done!")
Beispiel #3
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info(
        'ML Professoar HTTP trigger function TrainModel processed a request.')

    try:
        # Get Cognitive Services Environment Variables
        project_id = os.environ["ProjectID"]
        training_key = os.environ['TrainingKey']
        endpoint = os.environ['ClientEndpoint']
        publish_iteration_name = "SampleTreeDetection @ " + str(datetime.now())
        prediction_resource_id = os.environ['ResourceID']

    except Exception as e:
        message = str(e)
        logging.info(message)

        return func.HttpResponse(
            "Please ensure ProjectID, TrainingKey, ClientEndpoint, and ResourceID environment variables are correctly configured.",
            status_code=400)

    if project_id and training_key and endpoint and publish_iteration_name and prediction_resource_id:
        trainer = CustomVisionTrainingClient(training_key, endpoint=endpoint)

        try:
            iteration = trainer.train_project(project_id, force_train=True)
            while (iteration.status != "Completed"):
                iteration = trainer.get_iteration(project_id, iteration.id)
                logging.info("Training status: " + iteration.status)
                time.sleep(1)

            # The iteration is now trained. Publish it to the project endpoint
            trainer.publish_iteration(project_id, iteration.id,
                                      publish_iteration_name,
                                      prediction_resource_id)

        except Exception as e:
            message = str(e)
            logging.info(message)

            return func.HttpResponse(
                "Training failed for ProjectID: " + project_id +
                " could not be trained with message: " + message,
                status_code=400)

        return func.HttpResponse(
            "Training complete for ProjectID: " + project_id +
            " published under iteration name: " + publish_iteration_name,
            status_code=200)

    else:
        return func.HttpResponse(
            "Please ensure ProjectID, TrainingKey, ClientEndpoint, and ResourceID environment variables are correctly configured.",
            status_code=400)
Beispiel #4
0
def train_project(subscription_key):
    try:
        prediction_resource_id = os.environ[
            PREDICTION_RESOURCE_ID_KEY_ENV_NAME]
    except KeyError:
        raise PredictionResourceMissingError(
            "Didn't find a prediction resource to publish to. Please set the {} environment variable"
            .format(PREDICTION_RESOURCE_ID_KEY_ENV_NAME))

    trainer = CustomVisionTrainingClient(subscription_key, endpoint=ENDPOINT)

    # Create a new project
    print("Creating project...")
    project = trainer.create_project(SAMPLE_PROJECT_NAME,
                                     classification_type=Classifier.multiclass)

    # Make two tags in the new project
    hemlock_tag = trainer.create_tag(project.id, "Hemlock")
    cherry_tag = trainer.create_tag(project.id, "Japanese Cherry")
    pine_needle_tag = trainer.create_tag(project.id, "Pine Needle Leaves")
    flat_leaf_tag = trainer.create_tag(project.id, "Flat Leaves")

    print("Adding images...")
    hemlock_dir = os.path.join(IMAGES_FOLDER, "Hemlock")
    for image in os.listdir(hemlock_dir):
        with open(os.path.join(hemlock_dir, image), mode="rb") as img_data:
            trainer.create_images_from_data(
                project.id, img_data.read(),
                [hemlock_tag.id, pine_needle_tag.id])

    cherry_dir = os.path.join(IMAGES_FOLDER, "Japanese Cherry")
    for image in os.listdir(cherry_dir):
        with open(os.path.join(cherry_dir, image), mode="rb") as img_data:
            trainer.create_images_from_data(project.id, img_data.read(),
                                            [cherry_tag.id, flat_leaf_tag.id])

    print("Training...")
    iteration = trainer.train_project(project.id)
    while (iteration.status == "Training"):
        iteration = trainer.get_iteration(project.id, iteration.id)
        print("Training status: " + iteration.status)
        time.sleep(1)

    # The iteration is now trained. Name and publish this iteration to a prediciton endpoint
    trainer.publish_iteration(project.id, iteration.id, PUBLISH_ITERATION_NAME,
                              prediction_resource_id)
    print("Done!")

    return project
Beispiel #5
0
    project.id, ImageFileCreateBatch(images=tagged_images_with_regions))
if not upload_result.is_batch_successful:
    print("Image batch upload failed.")
    for image in upload_result.images:
        print("Image status: ", image.status)
    exit(-1)

print("Training...")
iteration = trainer.train_project(project.id)
while iteration.status != "Completed":
    iteration = trainer.get_iteration(project.id, iteration.id)
    print("Training status: " + iteration.status)
    time.sleep(1)

# The iteration is now trained. Publish it to the project endpoint
trainer.publish_iteration(project.id, iteration.id, publish_iteration_name,
                          prediction_resource_id)
print("Done!")

# Now there is a trained endpoint that can be used to make a prediction

# Open the sample image and get back the prediction results.
with open(base_image_location + "images/Test/test_od_image.jpg",
          mode="rb") as test_data:
    results = predictor.detect_image(project.id, publish_iteration_name,
                                     test_data)

# Display the results.
for prediction in results.predictions:
    print(
        "\t" + prediction.tag_name +
        ": {0:.2f}% bbox.left = {1:.2f}, bbox.top = {2:.2f}, bbox.width = {3:.2f}, bbox.height = {4:.2f}"
Beispiel #6
0
    def trainModel(self, database, modelID, parameters, onMessage, onFinished):
        onMessage("Trainer fetching model settings.")
        session = database.cursor()
        session.execute(
            "SELECT remote_id, training_data, extra_info FROM " +
            self._datatableName + " WHERE id = %s", (modelID, ))
        result = session.fetchone()
        session.close()

        if result:
            projectID, trainingData, _ = result

            onMessage("Training starting...")

            onMessage("Retrieving model...")
            trainer = CustomVisionTrainingClient(self._trainingKey,
                                                 endpoint=self._endPoint)
            project = trainer.get_project(projectID)

            onMessage("Downloading/Caching and Analyzing training data...")

            imageList = []
            dataClassList = {}

            try:
                start = time.time()
                # retrieve information of created tags
                createdTags = trainer.get_tags(projectID)
                for tag in createdTags:
                    dataClassList[tag.name] = tag

                imageOK = 0
                imageFailed = 0
                imageTotal = len(trainingData)

                def visualizeImageDownload():
                    return "(" + str(imageOK) + "/" + str(
                        imageFailed) + "/" + str(imageTotal) + ")"

                for photoID in trainingData:
                    image, _, err = self._serverAPI.getResource(
                        database, photoID)

                    if err:
                        imageFailed += 1
                        onMessage("Failed to download image " + str(photoID) +
                                  ". Error: " + err + " " +
                                  visualizeImageDownload())
                    else:
                        imageOK += 1

                        classOfData = str(trainingData[photoID])

                        # create tag if not exists
                        if classOfData not in dataClassList:
                            dataClassList[classOfData] = trainer.create_tag(
                                project.id, classOfData)

                        isOK, encodedImage = cv2.imencode('.png', image)
                        imageList.append(
                            ImageFileCreateEntry(
                                name=str(photoID) + ".png",
                                contents=encodedImage,
                                tag_ids=[dataClassList[classOfData].id]))

                        onMessage(visualizeImageDownload())
                end = time.time()
                onMessage("Image caching done. Used: " + str(end - start))

                start = time.time()
                for i in range(0, len(imageList), 64):
                    batch = imageList[i:i + 64]
                    upload_result = trainer.create_images_from_files(
                        project.id, images=batch)

                    if not upload_result.is_batch_successful:
                        onMessage("Image batch upload failed.")

                        for image in upload_result.images:
                            onMessage("Image status: ", image.status)

                        onFinished(False)
                        return
                end = time.time()
                onMessage("Image upload done. Used: " + str(end - start))

                onMessage("Training model with " + str(imageOK) + " photos...")

                iteration = trainer.train_project(project.id)
                while (iteration.status != "Completed"):
                    iteration = trainer.get_iteration(project.id, iteration.id)
                    onMessage("Training status: " + iteration.status)
                    time.sleep(3)

                # The iteration is now trained. Publish it to the project endpoint
                trainer.publish_iteration(project.id, iteration.id, projectID,
                                          self._resourceID)
                onMessage("Training done.")
                onFinished(True)

            except Exception as err:
                onMessage("Failed to train.")
                onMessage("Error Message: " + str(err))
                onFinished(False)
        else:
            onMessage("The trainer can't recognize the given model any more.")
            onFinished(False)
Beispiel #7
0
                                     contents=image_contents.read(),
                                     tag_ids=[tag.id]))


# Create chunks of 64 images
def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]


batchedImages = chunks(image_list, 64)

# Upload the images in batches of 64 to the Custom Vision Service
for batchOfImages in batchedImages:
    upload_result = trainer.create_images_from_files(project.id,
                                                     images=batchOfImages)

# Train the model
import time
iteration = trainer.train_project(project.id)
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    print("Training status: " + iteration.status)
    time.sleep(1)

# Publish the iteration of the model
publish_iteration_name = '<INSERT ITERATION NAME>'
resource_identifier = '<INSERT RESOURCE IDENTIFIER>'
trainer.publish_iteration(project.id, iteration.id, publish_iteration_name,
                          resource_identifier)
Beispiel #8
0
class Classifier:
    """
        Class for interacting with Custom Vision. Contatins three key methods:
            - predict_imgage() / predicts a an image
            - upload_images() / reads image URLs from Blob Storage and uploads to Custom Vision
            - train() / trains a model
    """
    def __init__(self) -> None:
        """
            Reads configuration file
            Initializes connection to Azure Custom Vision predictor and training resources.

            Parameters:
            blob_service_client: Azure Blob Service interaction client

            Returns:
            None
        """
        self.ENDPOINT = Keys.get("CV_ENDPOINT")
        self.project_id = Keys.get("CV_PROJECT_ID")
        self.prediction_key = Keys.get("CV_PREDICTION_KEY")
        self.training_key = Keys.get("CV_TRAINING_KEY")
        self.base_img_url = Keys.get("BASE_BLOB_URL")
        self.prediction_resource_id = Keys.get("CV_PREDICTION_RESOURCE_ID")

        self.prediction_credentials = ApiKeyCredentials(
            in_headers={"Prediction-key": self.prediction_key})
        self.predictor = CustomVisionPredictionClient(
            self.ENDPOINT, self.prediction_credentials)
        self.training_credentials = ApiKeyCredentials(
            in_headers={"Training-key": self.training_key})
        self.trainer = CustomVisionTrainingClient(self.ENDPOINT,
                                                  self.training_credentials)
        connect_str = Keys.get("BLOB_CONNECTION_STRING")
        self.blob_service_client = BlobServiceClient.from_connection_string(
            connect_str)
        try:
            # get all project iterations
            iterations = self.trainer.get_iterations(self.project_id)
            # find published iterations
            puplished_iterations = [
                iteration for iteration in iterations
                if iteration.publish_name != None
            ]
            # get the latest published iteration
            puplished_iterations.sort(key=lambda i: i.created)
            self.iteration_name = puplished_iterations[-1].publish_name

            with api.app.app_context():
                models.update_iteration_name(self.iteration_name)
        except Exception as e:
            logging.info(e)
            self.iteration_name = "iteration1"

    def predict_image_url(self, img_url: str) -> Dict[str, float]:
        """
            Predicts label(s) of Image read from URL.

            Parameters:
            img_url: Image URL

            Returns:
            (prediction (dict[str,float]): labels and assosiated probabilities,
            best_guess: (str): name of the label with highest probability)
        """
        with api.app.app_context():
            self.iteration_name = models.get_iteration_name()
        res = self.predictor.classify_image_url(self.project_id,
                                                self.iteration_name, img_url)
        pred_kv = dict([(i.tag_name, i.probability) for i in res.predictions])
        best_guess = max(pred_kv, key=pred_kv.get)

        return pred_kv, best_guess

    def predict_image(self, img) -> Dict[str, float]:
        """
            Predicts label(s) of Image read from URL.
            ASSUMES:
            -image of type .png
            -image size less than 4MB
            -image resolution at least 256x256 pixels

            Parameters:
            img_url: .png file

            Returns:
            (prediction (dict[str,float]): labels and assosiated probabilities,
            best_guess: (str): name of the label with highest probability)
        """
        with api.app.app_context():
            self.iteration_name = models.get_iteration_name()
        res = self.predictor.classify_image_with_no_store(
            self.project_id, self.iteration_name, img)
        # reset the file head such that it does not affect the state of the file handle
        img.seek(0)
        pred_kv = dict([(i.tag_name, i.probability) for i in res.predictions])
        best_guess = max(pred_kv, key=pred_kv.get)
        return pred_kv, best_guess

    def predict_image_by_post(self, img) -> Dict[str, float]:
        """
            Predicts label(s) of Image read from URL.
            ASSUMES:
            -image of type .png
            -image size less than 4MB
            -image resolution at least 256x256 pixels

            Parameters:
            img_url: .png file

            Returns:
            (prediction (dict[str,float]): labels and assosiated probabilities,
            best_guess: (str): name of the label with highest probability)
        """

        headers = {
            'content-type': 'application/octet-stream',
            "prediction-key": self.prediction_key
        }
        res = requests.post(Keys.get("CV_PREDICTION_ENDPOINT"),
                            img.read(),
                            headers=headers).json()
        img.seek(0)
        pred_kv = dict([(i["tagName"], i["probability"])
                        for i in res["predictions"]])
        best_guess = max(pred_kv, key=pred_kv.get)
        return pred_kv, best_guess

    def __chunks(self, lst, n):
        """
            Helper method used by upload_images() to upload URL chunks of 64, which is maximum chunk size in Azure Custom Vision.
        """
        for i in range(0, len(lst), n):
            yield lst[i:i + n]

    def upload_images(self, labels: List, container_name) -> None:
        """
            Takes as input a list of labels, uploads all assosiated images to Azure Custom Vision project.
            If label in input already exists in Custom Vision project, all images are uploaded directly.
            If label in input does not exist in Custom Vision project, new label (Tag object in Custom Vision) is created before uploading images

            Parameters:
            labels (str[]): List of labels

            Returns:
            None
        """
        url_list = []
        existing_tags = list(self.trainer.get_tags(self.project_id))

        try:
            container = self.blob_service_client.get_container_client(
                container_name)
        except Exception as e:
            print(
                "could not find container with CONTAINER_NAME name error: ",
                str(e),
            )

        for label in labels:
            # check if input has correct type
            if not isinstance(label, str):
                raise Exception("label " + str(label) + " must be a string")

            tag = [t for t in existing_tags if t.name == label]
            # check if tag already exists
            if len(tag) == 0:
                try:
                    tag = self.trainer.create_tag(self.project_id, label)
                    print("Created new label in project: " + label)
                except Exception as e:
                    print(e)
                    continue
            else:
                tag = tag[0]

            blob_prefix = f"{label}/"
            blob_list = container.list_blobs(name_starts_with=blob_prefix)

            if not blob_list:
                raise AttributeError("no images for this label")

            # build correct URLs and append to URL list
            for blob in blob_list:
                blob_url = f"{self.base_img_url}/{container_name}/{blob.name}"
                url_list.append(
                    ImageUrlCreateEntry(url=blob_url, tag_ids=[tag.id]))

        # upload URLs in chunks of 64
        print("Uploading images from blob to CV")
        img_f = 0
        img_s = 0
        img_d = 0
        itr_img = 0
        chunks = self.__chunks(url_list, setup.CV_MAX_IMAGES)
        num_imgs = len(url_list)
        error_messages = set()
        for url_chunk in chunks:
            upload_result = self.trainer.create_images_from_urls(
                self.project_id, images=url_chunk)
            if not upload_result.is_batch_successful:
                for image in upload_result.images:
                    if image.status == "OK":
                        img_s += 1
                    elif image.status == "OKDuplicate":
                        img_d += 1
                    else:
                        error_messages.add(image.status)
                        img_f += 1

                    itr_img += 1
            else:
                batch_size = len(upload_result.images)
                img_s += batch_size
                itr_img += batch_size

            prc = itr_img / num_imgs
            print(
                f"\t succesfull: \033[92m {img_s:5d} \033]92m \033[0m",
                f"\t duplicates: \033[33m {img_d:5d} \033]33m \033[0m",
                f"\t failed: \033[91m {img_f:5d} \033]91m \033[0m",
                f"\t [{prc:03.2%}]",
                sep="",
                end="\r",
                flush=True,
            )

        print()
        if len(error_messages) > 0:
            print("Error messages:")
            for error_message in error_messages:
                print(f"\t {error_message}")

    def get_iteration(self):
        iterations = self.trainer.get_iterations(self.project_id)
        iterations.sort(key=(lambda i: i.created))
        newest_iteration = iterations[-1]
        return newest_iteration

    def delete_iteration(self) -> None:
        """
            Deletes the oldest iteration in Custom Vision if there are 11 iterations.
            Custom Vision allows maximum 10 iterations in the free version.
        """
        iterations = self.trainer.get_iterations(self.project_id)
        if len(iterations) >= setup.CV_MAX_ITERATIONS:
            iterations.sort(key=lambda i: i.created)
            oldest_iteration = iterations[0].id
            self.trainer.unpublish_iteration(self.project_id, oldest_iteration)
            self.trainer.delete_iteration(self.project_id, oldest_iteration)

    def train(self, labels: list) -> None:
        """
            Trains model on all labels specified in input list, exeption is raised by self.trainer.train_projec() is asked to train on non existent labels.
            Generates unique iteration name, publishes model and sets self.iteration_name if successful.
            Parameters:
            labels (str[]): List of labels
        """
        try:
            email = Keys.get("EMAIL")
        except Exception:
            print("No email found, setting to empty")
            email = ""

        self.delete_iteration()
        print("Training...")
        iteration = self.trainer.train_project(
            self.project_id,
            reserved_budget_in_hours=1,
            notification_email_address=email,
        )
        # Wait for training to complete
        start = time.time()
        while iteration.status != "Completed":
            iteration = self.trainer.get_iteration(self.project_id,
                                                   iteration.id)
            minutes, seconds = divmod(time.time() - start, 60)
            print(
                f"Training status: {iteration.status}",
                f"\t[{minutes:02.0f}m:{seconds:02.0f}s]",
                end="\r",
            )
            time.sleep(1)

        print()

        # The iteration is now trained. Publish it to the project endpoint
        iteration_name = uuid.uuid4()
        self.trainer.publish_iteration(
            self.project_id,
            iteration.id,
            iteration_name,
            self.prediction_resource_id,
        )
        with api.app.app_context():
            self.iteration_name = models.update_iteration_name(iteration_name)

    def delete_all_images(self) -> None:
        """
            Function for deleting uploaded images in Customv Vision.
        """
        try:
            self.trainer.delete_images(self.project_id,
                                       all_images=True,
                                       all_iterations=True)
        except Exception as e:
            raise Exception("Could not delete all images: " + str(e))

    def retrain(self):
        """
            Train model on all labels and update iteration.
        """
        with api.app.app_context():
            labels = models.get_all_labels()

        self.upload_images(labels, setup.CONTAINER_NAME_NEW)
        try:
            self.train(labels)
        except CustomVisionErrorException as e:
            msg = "No changes since last training"
            print(e, "exiting...")
            raise excp.BadRequest(msg)

    def hard_reset_retrain(self):
        """
            Train model on all labels and update iteration.
            This method sleeps for 60 seconds to make sure all
            old images are deleted from custom vision before
            uploading original dataset.
        """
        with api.app.app_context():
            labels = models.get_all_labels()

        # Wait 60 seconds to make sure all images are deleted in custom vision
        time.sleep(60)
        self.upload_images(labels, setup.CONTAINER_NAME_ORIGINAL)
        try:
            self.train(labels)
        except CustomVisionErrorException as e:
            msg = "No changes since last training"
            print(e, "exiting...")
            raise excp.BadRequest(msg)