def extract_contour_dataset(dataset_location='classification/sample_data/imgs_recycling/',
                                  dataset_name='recycling', 
                                  enable_logging=True):
    # if we're not running inside AML WB, set up the share directory
    if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ:
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share'

    # create a dataset from a directory with folders representing different classes
    dataset = ClassificationDataset.create_from_dir(dataset_name,
                                                    dataset_location,
                                                    enable_logging=enable_logging)

    extract_contour(dataset)
Exemple #2
0
def classify(dataset_location='classification/sample_data/imgs_recycling/',
             dataset_name='recycling',
             do_augmentations=False):
    """
      a sample pipeline for classification.

      loads a dataset, optionally does some augmentations, creates and trains a
      model using transfer learning based on ResNet18, and returns the accuracy
      on a test set.

      Args:
        dataset_location: path to a dataset.  there should be a top level folder
          containing folders for each class.  see the sample recycling dataset for
          an example of the format
        dataset_name: the of the dataset.  will be used in the dataset
          management functionality
        do_augmentations: boolean.  specifies whether augmentations should be
          applied to the test set

      Returns:
        the accuracy on the test set
    """

    # if we're not running inside AML WB, set up the share directory
    if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ:
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share'

    # create a dataset from a directory with folders representing different classes
    dataset = ClassificationDataset.create_from_dir(dataset_name,
                                                    dataset_location)

    # print out some info about the dataset
    dataset.print_info()

    # split the full dataset into a train and test set
    # the stratify option will ensure that the different labels are balanced in the
    # train and test sets
    splitter = Splitter(dataset)
    train_set_orig, test_set = splitter.split(train_size=.8, stratify='label')

    # optionally augment images by cropping and rotating
    if do_augmentations:
        # here we create two pipelines for doing augmentations.  the first
        # will rotate each image by between -45 and 45 degrees (the angle is
        # chosen at random).  then the rotated images will be flipped from left
        # to right with probability .5.  the second pipeline will randomly crop
        # images by between 0 and 10 percent.  each pipeline will be applied to
        # the original dataset.  the resulting dataset will three times as many
        # images as the original - the original dataset, the dataset after
        # augmentation by the rotate_and_flip pipeline, and the dataset
        # after augmentation by the crop pipeline
        rotate_and_flip = augmenters.Sequential(
            [augmenters.Affine(rotate=(-45, 45)),
             augmenters.Fliplr(.5)])

        crop = augmenters.Sequential([augmenters.Crop(percent=(0, .1))])

        train_set = augment_dataset(train_set_orig, [rotate_and_flip, crop])
    else:
        train_set = train_set_orig

    # now create the model
    base_model_name = 'ResNet18_ImageNet_CNTK'
    model = CNTKTLModel(train_set.labels,
                        base_model_name=base_model_name,
                        output_path='.')

    # train the model using cntk
    model.train(train_set)

    # return the accuracy
    ce = ClassificationEvaluation(model, test_set, minibatch_size=16)
    acc = ce.compute_accuracy()

    return acc
def train_deploy(dataset_location='classification/sample_data/imgs_recycling',
                 dataset_name='recycling', do_augmentations=True,
                 deployment_name="testdeployment", azureml_rscgroup=None, azureml_cluster_name=None):
    """
      a sample pipeline for deploying themodel that is trained on a dataset.

      loads a dataset, optionally does some augmentations, creates and trains a
      model using transfer learning based on ResNet18, deploys the trained model
      on the specified Azure ML cluster or picks up the one set using the CLI.
      and returns the Scoring URL.

      Args:
        dataset_location: path to a dataset.  there should be a top level folder
          containing folders for each class.  see the sample recycling dataset for
          an example of the format
        dataset_name: the of the dataset.  will be used in the dataset
          management functionality
        do_augmentations: boolean.  specifies whether augmentations should be
          applied to the test set
        deployment_name: the deployment of the deployment. Will be used in deployment 
          management facility
        azureml_rscgroup: Azure ML resource group name of the model management account.
           If not set, default value will be picked up if set from CLI
        azureml_cluster_name: Azure ML cluster name where the model is deployed. If not set, 
           default value will be picked up if set from CLI.

      Returns:
        the scoring API URL of the deployment
    """
    # if we're not running inside AML WB, set up the share directory
    if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ:
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share'
    context = Context.get_global_context()

    # create a dataset from a directory with folders representing different classes
    dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location)

    # print out some info about the dataset
    dataset.print_info()

    # optionally augment images by cropping and rotating
    if do_augmentations:
        # here we create two pipelines for doing augmentations.  the first
        # will rotate each image by between -45 and 45 degrees (the angle is
        # chosen at random).  then the rotated images will be flipped from left
        # to right with probability .5.  the second pipeline will randomly crop
        # images by between 0 and 10 percent.  each pipeline will be applied to
        # the original dataset.  the resulting dataset will three times as many
        # images as the original - the original dataset, the dataset after
        # augmentation by the rotate_and_flip pipeline, and the dataset
        # after augmentation by the crop pipeline
        rotate_and_flip = augmenters.Sequential([
            augmenters.Affine(rotate=(-45, 45)),
            augmenters.Fliplr(.5)])

        crop = augmenters.Sequential([augmenters.Crop(percent=(0, .1))])

        train_set = augment_dataset(dataset, [rotate_and_flip, crop])
    else:
        train_set = dataset

    # now create the model
    base_model_name = 'ResNet18_ImageNet_CNTK'
    model = CNTKTLModel(train_set.labels,
                        base_model_name = base_model_name,
                        output_path='.')

    # train the model using cntk
    num_epochs = 5
    mb_size = 32
    model.train(train_set,
                lr_per_mb=[.01] * 20 + [.001] * 20 + [.0001],
                num_epochs=num_epochs,
                mb_size=mb_size)

    print("Model state:", model.model_state)

    # check if the deployment exists, if yes remove it first
    AMLDeployment.delete_if_service_exist(deployment_name)

    #deploy the trained model
    deploy_obj = AMLDeployment(
            deployment_name=deployment_name, associated_DNNModel=model, aml_env = "cluster", replicas=1)
    deploy_obj.deploy()

    return deploy_obj.service_url
    # Dataset Creation
    if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ:
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share'
    context = Context.get_global_context()

    dataset_name = "fashion"
    dataset_location = os.path.join(
        Context.get_global_context().storage.outputs_path, "data",
        dataset_name)
    print("Dataset Location:", dataset_location)

    print("Downloading images to: " + dataset_location)
    download_images.download_all(dataset_location)

    dataset = ClassificationDataset.create_from_dir(dataset_name,
                                                    dataset_location)
    print("Dataset consists of {} images with {} labels.".format(
        len(dataset.images), len(dataset.labels)))
    # Split the data into train and test
    splitter = Splitter(dataset)
    train_set, test_set = splitter.split(train_size=.5,
                                         random_state=1,
                                         stratify="label")
    print("Number of original training images = {}.".format(train_set.size()))

    num_train_sets = 20
    num_test_sets = 20
    num_different_label = 50
    trainPairs = ImagePairs(train_set, num_train_sets, num_different_label)
    print(
        'There are {} sets of image pairs generated for all labels from training data.'
def classify(dataset_location='classification/sample_data/imgs_recycling/',
             dataset_name='recycling', do_augmentations=True,
             enable_logging=True):
    """
      a sample pipeline for classification.

      loads a dataset, optionally does some augmentations, creates and trains a
      model using transfer learning based on ResNet18, and returns the accuracy
      on a test set.

      Args:
        dataset_location: path to a dataset.  there should be a top level folder
          containing folders for each class.  see the sample recycling dataset for
          an example of the format
        dataset_name: the of the dataset.  will be used in the dataset
          management functionality
        do_augmentations: boolean.  specifies whether augmentations should be
          applied to the test set

      Returns:
        the accuracy on the test set
    """

    # if we're not running inside AML WB, set up the share directory
    if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ:
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share'

    # create a dataset from a directory with folders representing different classes
    dataset = ClassificationDataset.create_from_dir(dataset_name,
                                                    dataset_location)

    # print out some info about the dataset
    print("DATASET INFO:")
    dataset.print_info()

    # split the full dataset into a train and test set
    # the stratify option will ensure that the different labels are balanced in the
    # train and test sets
    train_set_orig, test_set = dataset.split(train_size = 0.66, stratify = "label")

    # optionally augment images by cropping and rotating
    if do_augmentations:
        aug_sequence = augmenters.Sequential([
            augmenters.Fliplr(0.5),             # horizontally flip 50% of all images
            augmenters.Crop(percent=(0, 0.1))  # crop images by 0-10% of their height/width
        ])
        train_set = augment_dataset(train_set_orig, [aug_sequence])
        print("Number of original training images = {}, with augmented images included = {}.".format(train_set_orig.size(), train_set.size()))
    else:
        train_set = train_set_orig

    # model creation
    lr_per_mb = [0.05]*7 + [0.005]*7 +  [0.0005]
    mb_size = 32
    input_resoluton = 224
    base_model_name = 'ResNet18_ImageNet_CNTK'
    model = CNTKTLModel(train_set.labels,
                       base_model_name=base_model_name,
                       image_dims = (3, input_resoluton, input_resoluton))

    # train the model using cntk
    ce = ClassificationEvaluation(model, test_set, minibatch_size = mb_size)

    acc = ce.compute_accuracy()
    print("Accuracy = {:2.2f}%".format(100*acc))
    cm  = ce.compute_confusion_matrix()
    print("Confusion matrix = \n{}".format(cm))

    cm_ui = ConfusionMatrixUI(cm, [l.name for l in test_set.labels])
    show(cm_ui.ui)

    return acc
def create_dataset_from_json():

    file_labels = "C:\\Users\\miprasad\\Downloads\\cvp-1.0.0b2-release5\\cvp-1.0.0b2-release\\cvp_project\\classification\\scripts\\file_labels.json"

    dataset = ClassificationDataset.create_from_json("recycling", file_labels, context=None)
    dataset.print_info()