Esempio n. 1
0
def prepare():
    ws = None
    try:
        print("Connecting to workspace '%s'..." % workspace_name)
        ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name)
    except:
        print("Workspace not accessible.")
    print(ws.get_details())

    ws.write_config()

    #
    # Register an existing datastore to the workspace.
    #
    if datastore_name not in ws.datastores:
        Datastore.register_azure_blob_container(
            workspace=ws,
            datastore_name=datastore_name,
            container_name=blob_container_name,
            account_name=blob_account_name,
            account_key=blob_account_key
        )
        print("Datastore '%s' registered." % datastore_name)
    else:
        print("Datastore '%s' has already been regsitered." % datastore_name)
Esempio n. 2
0
 def register_datastore(self, datastore_name, blob_container,
                        storage_acct_name, storage_acct_key):
     Datastore.register_azure_blob_container(workspace=self.workspace,
                                             datastore_name=datastore_name,
                                             container_name=blob_container,
                                             account_name=storage_acct_name,
                                             account_key=storage_acct_key)
Esempio n. 3
0
def get_datastore(ws: Workspace, datastore_name: str, container: str,
                  account_name: str, account_key: str) -> Datastore:
    if not datastore_name in ws.datastores:
        Datastore.register_azure_blob_container(workspace=ws,
                                                datastore_name=datastore_name,
                                                container_name=container,
                                                account_name=account_name,
                                                account_key=account_key,
                                                create_if_not_exists=True)

    return ws.datastores[datastore_name]
Esempio n. 4
0
def register_datastore(workspace, ds_config):
  ds_name = ds_config.get("name")

  if not is_datastore_exists(workspace, ds_name):
    Datastore.register_azure_blob_container(
      workspace=workspace,
      datastore_name=ds_name,
      account_name=ds_config.get("account_name"),
      container_name=ds_config.get("container_name"),
      account_key=ds_config.get("account_key"),
      create_if_not_exists=ds_config.get("create_if_not_exists")
    )
Esempio n. 5
0
def _create_datastore(
    aml_workspace,
    datastore_name,
    container_name,
    account_name,
    account_key,
    create_if_not_exists=True,
):
    """Creates datastore

    Args:
        datastore_name (string): Name you wish to assign to your datastore.
        container_name (string): Name of your container.
        account_name (string): Storage account name.
        account_key (string): The storage account key.

    Returns:
        azureml.core.Datastore
    """
    logger = logging.getLogger(__name__)
    ds = Datastore.register_azure_blob_container(
        workspace=aml_workspace,
        datastore_name=datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        create_if_not_exists=create_if_not_exists,
    )
    logger.info(f"Registered existing blob storage: {ds.name}.")
    return ds
    def _setup_datastore(self, blob_dataset_name, output_path=None):
        """
        sets up the datastore in azureml. Either retrieves a pre-existing datastore
        or registers a new one in the workspace.

        :param str blob_dataset_name: [required] name of the datastore registered with the
                                 workspace. If the datastore does not yet exist, the
                                 name it will be registered under.
        :param str output_path: [optional] if registering a datastore for inferencing,
                                the output path for writing back predictions.
        """
        try:
            self.blob_ds = Datastore.get(self.ws, blob_dataset_name)
            print("Found Blob Datastore with name: %s" % blob_dataset_name)
        except HttpOperationError:
            self.blob_ds = Datastore.register_azure_blob_container(
                workspace=self.ws,
                datastore_name=blob_dataset_name,
                account_name=self.account_name,
                container_name=self.container_name,
                account_key=self.account_key,
                subscription_id=self.blob_sub_id,
            )

            print("Registered blob datastore with name: %s" %
                  blob_dataset_name)
        if output_path is not None:
            self.output_dir = PipelineData(
                name="output",
                datastore=self.ws.get_default_datastore(),
                output_path_on_compute=output_path)
Esempio n. 7
0
def register_blob_datastore(
    workspace: Workspace,
    blob_datastore_name: str,
    container_name: str,
    account_name: str,
    account_key: str,
    datastore_rg: str,
) -> AzureBlobDatastore:
    """
    Register a Blob Storage Account with the Azure Machine Learning Workspace

    :param workspace: Azure Machine Learning Workspace
    :param blob_datastore_name: Name for blob datastore
    :param container_name: Name for blob container
    :param account_name: Name for blob account
    :param account_key: Blob Account Key using for auth
    :param datastore_rg: Resource Group containing Azure Storage Account
    :return: Pointer to Azure Machine Learning Blob Datastore
    """
    return Datastore.register_azure_blob_container(
        workspace=workspace,
        datastore_name=blob_datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        resource_group=datastore_rg,
        overwrite=True,
    )
def get_or_create_datastore(
    datastorename: str, ws: Workspace, env: Env, input: bool = True
) -> Datastore:
    """
    Obtains a datastore with matching name. Creates it if none exists.

    :param datastorename: Name of the datastore
    :param ws: Current AML Workspace
    :param env: Environment variables
    :param input: Datastore points to the input container if
    this is True(default) or the output storage container otherwise

    :returns: Datastore

    :raises: ValueError
    """
    if datastorename is None:
        raise ValueError("Datastore name is required.")

    containername = (
        env.scoring_datastore_input_container
        if input
        else env.scoring_datastore_output_container
    )

    if datastorename in ws.datastores:

        datastore = ws.datastores[datastorename]

    # the datastore is not registered but we have all details to register it
    elif (
        env.scoring_datastore_access_key is not None
        and containername is not None  # NOQA: E501
    ):  # NOQA:E501

        datastore = Datastore.register_azure_blob_container(
            workspace=ws,
            datastore_name=datastorename,
            account_name=env.scoring_datastore_storage_name,
            account_key=env.scoring_datastore_access_key,
            container_name=containername,
        )
    else:
        raise ValueError(
            "No existing datastore named {} nor was enough information supplied to create one.".format(  # NOQA: E501
                datastorename
            )
        )

    return datastore
Esempio n. 9
0
def register_blob_ws(ws, ds_name, container_name):
    """
    Register blob storage as datastore in workspace
    :param ws: azureml Workspace instance
    :return:
    """
    ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name=ds_name,
        container_name=container_name,
        account_name=os.environ.get('ACCOUNT_NAME'),
        account_key=os.environ.get('ACCOUNT_ACCESS_KEY'),
        create_if_not_exists=True)
    return ds
Esempio n. 10
0
def _create_datastore(
    aml_workspace,
    datastore_name,
    container_name,
    account_name,
    account_key,
    create_if_not_exists=True,
):
    ds = Datastore.register_azure_blob_container(
        workspace=aml_workspace,
        datastore_name=datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        create_if_not_exists=create_if_not_exists,
    )
    return ds
Esempio n. 11
0
def get_datastore():
    env = EnvironmentVariables()
    datastore_name = env.datastore_name
    storage_account_name = env.storage_account_name
    storage_container_name = env.storage_container_name
    storage_account_key = env.storage_account_key
    workspace = get_workspace()

    try:
        datastore = Datastore.get(workspace=workspace, datastore_name=datastore_name)
    except HttpOperationError:
        datastore = Datastore.register_azure_blob_container(
            workspace=workspace,
            datastore_name=datastore_name,
            account_name=storage_account_name,
            container_name=storage_container_name,
            account_key=storage_account_key)

    return datastore
Esempio n. 12
0
    def mount(self, storage_name, storage_key, container):

        ws = Workspace(subscription_id, resource_group,
                       ws_name)  # , auth=svc_pr)

        from azureml.core import Datastore
        datastore = Datastore.register_azure_blob_container(
            workspace=ws,
            datastore_name=container,
            container_name=container,
            account_name=storage_name,
            account_key=storage_key,
            create_if_not_exists=True)

        console.print("datastore=", datastore)

        dataref = datastore.as_mount()
        dir_name = dataref.path_on_compute
        console.print("daatastore MOUNT dir_name=", dir_name)
        return dir_name
Esempio n. 13
0
def get_or_create_datastore(datastore_name, env, workspace, input=True):
    if datastore_name is None:
        raise ValueError('datastore name can not be empty!')

    container_name = env.scoring_datastore_input_container if input else env.scoring_datastore_output_container
    if datastore_name in workspace.datastores:
        datastore = workspace.datastores[datastore_name]
    elif container_name is not None and env.scoring_datastore_access_key is not None:
        datastore = Datastore.register_azure_blob_container(
            workspace=workspace,
            datastore_name=datastore_name,
            container_name=container_name,
            account_name=env.scoring_datastore_storage_name,
            account_key=env.scoring_datastore_access_key)
    else:
        raise ValueError(
            'no datastore_name exsting in current workspace nor enough info provided to build a new datastore'
        )

    return datastore
def run_azure_experiment_with_storage(
    subscription_id: str,
    resource_group: str,
    workspace_name: str,
    datastore_name: str,
    container_name: str,
    storage_account_name: str,
    storage_account_key: str,
    compute_name: str,
    experiment_name: Optional[str] = None,
    source_directory: Optional[str] = None,
    image_name: Optional[str] = None,
    use_gpu=True,
) -> Run:
    workspace = Workspace(subscription_id, resource_group, workspace_name,)
    data_store = Datastore.register_azure_blob_container(
        workspace=workspace,
        datastore_name=datastore_name,
        container_name=container_name,
        account_name=storage_account_name,
        account_key=storage_account_key,
    )
    source_directory = source_directory or dirname(__file__)
    assert (
        compute_name in workspace.compute_targets
    ), f"compute {compute_name} is not created in {workspace_name} workspace"
    estimator = Estimator(
        source_directory=source_directory,
        script_params={"--data-folder": data_store.as_mount()},
        compute_target=workspace.compute_targets[compute_name],
        pip_packages=pip_packages(),
        entry_script=os.path.join(source_directory, "azure_train.py"),
        use_gpu=use_gpu,
        custom_docker_image=image_name,
    )
    experiment_name = experiment_name or __file__.split(os.sep)[-1].split(".py")[0]
    experiment = Experiment(workspace=workspace, name=experiment_name)
    run = experiment.submit(estimator)
    return run
Esempio n. 15
0
def register_blob_datastore(subscription_id,
                            resource_group,
                            workspace,
                            datastore_name,
                            container_name,
                            account_name,
                            account_key,
                            set_as_default=True):

    datastore = Datastore.register_azure_blob_container(
        workspace=workspace,
        datastore_name=datastore_name,
        grant_workspace_access=True,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        subscription_id=subscription_id,
        resource_group=resource_group)
    if set_as_default:
        datastore.set_as_default()

    return datastore
Esempio n. 16
0
    def _load(self) -> np.ndarray:
        """Loads data from the image file.

        Returns:
            Data from the image file as a numpy array.
        """
        # Initialis Workspace

        ws = Workspace.from_config()

        blob_datastore_name = self._credentials['storage_name']
        account_name        = self._credentials['storage_name']   # Storage account name
        container_name      = self._credentials['container_name'] # Name of Azure blob container
        account_key         = self._credentials['key']            # Storage account key

        # Register a new datastore
        try:
            blob_datastore = blob_datastore = Datastore.get(ws, blob_datastore_name)
            print("Found Blob Datastore with name: %s" % blob_datastore_name)

        except HttpOperationError:
            blob_datastore = Datastore.register_azure_blob_container(workspace = ws, 
                                                                datastore_name = blob_datastore_name, 
                                                                container_name = container_name,
                                                                account_name = account_name,
        blob_datastore.download(target_path=self._local_path,
                                prefix=self._container_path,
                                show_progress=False)                                                    
        ...

    def _save(self, data: np.ndarray) -> None:
        """Saves image data to the specified filepath"""
        ...

    def _describe(self) -> Dict[str, Any]:
        
        """Returns a dict that describes the attributes of the dataset"""
Esempio n. 17
0
workspace = Workspace.from_config()
print('Workspace name: ' + workspace.name, 
      'Azure region: ' + workspace.location, 
      'Subscription id: ' + workspace.subscription_id, 
      'Resource group: ' + workspace.resource_group, sep = '\n')
# -

from model import TFBertForMultiClassification
from transformers import BertTokenizer
import tensorflow as tf

datastore_config = json.loads(open('datastore.json').read())
datastore = Datastore.register_azure_blob_container(workspace=workspace, 
                                                    datastore_name=datastore_config['datastore_name'], 
                                                    container_name=datastore_config['container_name'],
                                                    account_name=datastore_config['account_name'], 
                                                    sas_token=datastore_config['sas_token'])

# If you haven't finished training the model then just download pre-made model from datastore
datastore.download('./',prefix="azure-service-classifier/model")

def encode_example(text, max_seq_length):
    # Encode inputs using tokenizer
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=max_seq_length
        )
    input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
    # The mask has 1 for real tokens and 0 for padding tokens. Only real tokens are attended to.
Esempio n. 18
0
try:
    ct = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found existing cluster '%s'. Skip." % cluster_name)
except ComputeTargetException:
    print("Creating new cluster '%s'..." % cluster_name)
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_NC6", min_nodes=0, max_nodes=1)
    ct = ComputeTarget.create(ws, cluster_name, compute_config)
    ct.wait_for_completion(show_output=True)
print(ct.get_status().serialize())

#
# Register an existing datastore to the workspace.
#

datastore_name = "hellotfstore"
if datastore_name not in ws.datastores:
    Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name=datastore_name,
        container_name="hello-tf",
        account_name="wuhamltestsa",
        account_key=
        "LBpyUOlJT/wbiHQReiwY1EB3WhDF3Sn2STia4UY//SkMWerh08M0QjhImmQ8TwCrmvDfq0tVtB3xF9mxZFiMXA=="
    )
    print("Datastore '%s' registered." % datastore_name)
else:
    print("Datastore '%s' has already been regsitered." % datastore_name)

# (END)
Esempio n. 19
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#connecting the workspace
from azureml.core import Workspace, Datastore
ws = Workspace.from_config()

#Registering a new datastores

# for registration we need storage name and storage key in which we want to create this data source
blob_ds = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name="blob_data",
    container_name='azureml-blobstore-bf4e0c62-87d2-4233-920c-6870aa62cfc0',
    account_name='rishabhmachine5989301776',
    account_key=
    'OCToPz0m8zQBNxIUL01aZDyhHDGK3fDuMXCE0NV/e28UW89q9YWfZimujAeGMS4dvGSOEbHE5YYFmZUFRrXaeA=='
)

#lets check all the datastores in the workspace

for ds_name in ws.datastores:
    print(ds_name)

#get a reference to any datastore

blob_store = Datastore.get(ws, datastore_name="blob_data")

#to get the by default datastore
default_store = ws.get_default_datastore()
Esempio n. 20
0
def main(args):
    logging.info('Main started.')

    # Define workspace object
    try:
        ws = Workspace.from_config(path='config.json')
    # Need to create the workspace
    except Exception as err:
        print('No workspace.  Check for config.json file.')
        assert False
        # ws = Workspace.create(name=os.getenv('WORKSPACE_NAME', ''),
        #             subscription_id=os.getenv('AZURE_SUB', ''),
        #             resource_group=os.getenv('RESOURCE_GROUP', ''),
        #             create_resource_group=True,
        #             location='westus2'))
        # print("Created workspace {} at location {}".format(ws.name, ws.location))

    # choose a name for your cluster - under 16 characters
    cluster_name = "gpuforkeras"

    try:
        compute_target = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing compute target.')
    except ComputeTargetException:
        print('Creating a new compute target...')
        # AML Compute config - if max_nodes are set, it becomes persistent storage that scales
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6', min_nodes=0, max_nodes=5)
        # create the cluster
        compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current cluster.
    # print(compute_target.get_status().serialize())

    # # Create a project directory and copy training script to ii
    project_folder = os.path.join(os.getcwd(), 'project')

    # Create an experiment
    experiment_name = args.experiment_name
    experiment = Experiment(ws, name=experiment_name)

    # # Use an AML Data Store for training data
    ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name=args.datastore_name,
        container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''),
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Set up for training
    script_params = {
        # --data_path is a Python object that will mount the
        #   datastore to the compute target in next step (linking
        #   to Blob Storage)
        '--data_path': ds.as_mount(),
        '--data_dir': args.data_dir,
        '--gpu_num': args.gpu_num,
        '--class_path': args.class_path,
        '--num_clusters': args.num_clusters,
        '--batch_size': args.batch_size,
        '--learning_rate': args.learning_rate
    }

    # Instantiate TensorFlow estimator to call training script
    estimator = TensorFlow(source_directory=project_folder,
                           script_params=script_params,
                           compute_target=compute_target,
                           entry_script='train_azureml.py',
                           pip_packages=[
                               'keras==2.2.4', 'matplotlib==3.1.1',
                               'opencv-python==4.1.1.26', 'Pillow', 'numpy',
                               'configparser', 'python-dotenv',
                               'tensorflow==1.13.1'
                           ],
                           use_gpu=True,
                           framework_version='1.13')

    # Submit and wait for run to complete - check experiment in Azure Portal for progress
    run = experiment.submit(estimator)
    print(run.get_details())
    run.wait_for_completion(show_output=True)

    # Register models to Workspace
    model = run.register_model(
        model_name='keras-dnn-intermediate',
        model_path='./outputs/trained_weights_intermediate.h5',
        tags={
            'framework': "Keras",
            'task': "object detection"
        },
        description="Custom Keras YOLOv3 model - before fine-tuning phase")
    model = run.register_model(
        model_name='keras-dnn',
        model_path='./outputs/trained_weights_final.h5',
        tags={
            'framework': "Keras",
            'task': "object detection"
        },
        description="Custom Keras YOLOv3 model - final, after fine-tuning phase"
    )
Esempio n. 21
0
        workspace_name = arg

print("Azure ML SDK Version: ", VERSION)

#### Connect to our workspace ####
##################################

# workspace
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)

# data
ds = Datastore.register_azure_blob_container(workspace=ws,
                                             datastore_name=datastorename,
                                             container_name='seer-container',
                                             account_name=storage_account,
                                             account_key=storage_account_key,
                                             create_if_not_exists=True)
datastore = ws.datastores[datastorename]

# compute target
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=computetarget)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_NC12', min_nodes=1, max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, computetarget, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
def main(req: func.HttpRequest) -> (func.HttpResponse):
    logging.info('Python HTTP trigger function processed a request.')

    # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string>
    image_url = req.params.get('start')
    logging.info(type(image_url))

    # Write a config.json (fill in template values with system vars)
    config_temp = {
        'subscription_id': os.getenv('AZURE_SUB', ''),
        'resource_group': os.getenv('RESOURCE_GROUP', ''),
        'workspace_name': os.getenv('WORKSPACE_NAME', '')
    }
    with open(os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'),
              'w') as f:
        json.dump(config_temp, f)

    # Get the workspace from config.json
    try:
        ws = Workspace.from_config(
            os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'))
    # Authentication didn't work
    except ProjectSystemException as err:
        return json.dumps('ProjectSystemException')
    # Need to create the workspace
    except Exception as err:
        ws = Workspace.create(
            name=os.getenv('WORKSPACE_NAME', ''),
            subscription_id=os.getenv('AZURE_SUB', ''),
            resource_group=os.getenv('RESOURCE_GROUP', ''),
            create_resource_group=True,
            location='eastus2'  # Or other supported Azure region   
        )

    # choose a name for your cluster
    cluster_name = "gpuclusterplease"

    try:
        compute_target = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing compute target.')
    except ComputeTargetException:
        print('Creating a new compute target...')
        # AML Compute config - if max_nodes are set, it becomes persistent storage that scales
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6', max_nodes=4)
        # create the cluster
        compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current cluster.
    print(compute_target.get_status().serialize())

    # Create a project directory and copy training script to ii
    project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project')
    os.makedirs(project_folder, exist_ok=True)
    shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'),
                project_folder)

    # Create an experiment
    experiment_name = 'fish-no-fish'
    experiment = Experiment(ws, name=experiment_name)

    # Use an AML Data Store for training data
    ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='funcdefaultdatastore',
        container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''),
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Use an AML Data Store to save models back up to
    ds_models = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='modelsdatastorage',
        container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''),
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Set up for training ("trans" flag means - use transfer learning and
    # this should download a model on compute)
    script_params = {
        '--data_dir': ds.as_mount(),
        '--num_epochs': 30,
        '--learning_rate': 0.01,
        '--output_dir': './outputs',
        '--trans': 'True'
    }

    # Instantiate PyTorch estimator with upload of final model to
    # a specified blob storage container (this can be anything)
    estimator = PyTorch(
        source_directory=project_folder,
        script_params=script_params,
        compute_target=compute_target,
        entry_script='pytorch_train.py',
        use_gpu=True,
        inputs=[
            ds_models.as_upload(
                path_on_compute='./outputs/model_finetuned.pth')
        ])

    run = experiment.submit(estimator)
    run.wait_for_completion(show_output=True)

    return json.dumps('Job complete')
Esempio n. 23
0
def launch_experiment(ws, conf_aml, conf_cluster, conf_docker,
                      conf_experiment):

    # Register the input data blob container
    input_ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='petridishdata',
        container_name='datasets',
        account_name='petridishdata',
        account_key=conf_aml['azure_storage_account_key'],
        create_if_not_exists=False)

    output_ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='petridishoutput',
        container_name='amloutput',
        account_name='petridishdata',
        account_key=conf_aml['azure_storage_account_key'],
        create_if_not_exists=False)

    # Create or attach compute cluster
    cluster_name = conf_cluster['cluster_name']

    try:
        compute_target = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing compute target.')
    except:
        print('Creating a new compute target...')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=conf_cluster['vm_size'],
            max_nodes=conf_cluster['max_nodes'],
            vm_priority=conf_cluster['vm_priority'],
            idle_seconds_before_scaledown=conf_cluster[
                'idle_seconds_before_scaledown'])

        # Create the cluster
        compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current cluster.
    print(compute_target.get_status().serialize())

    # Set project directory
    # Assuming running in extract_features_from_videos folder
    project_folder = '../../'

    # Setup custom docker usage
    image_registry_details = ContainerRegistry()
    image_registry_details.address = conf_docker['image_registry_address']
    image_registry_details.username = conf_docker['image_registry_username']
    image_registry_details.password = conf_docker['image_registry_password']

    # don't let the system build a new conda environment
    user_managed_dependencies = True

    # Note that experiment names have to be
    # <36 alphanumeric characters
    exp_name = conf_experiment['experiment_name']

    experiment = Experiment(ws, name=exp_name)
    script_params = {
        '--nas.eval.loader.dataset.dataroot': input_ds.path('/').as_mount(),
        '--nas.search.loader.dataset.dataroot': input_ds.path('/').as_mount(),
        '--common.logdir': output_ds.path('/').as_mount(),
    }

    est = Estimator(source_directory=project_folder,
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='scripts/main.py',
                    custom_docker_image=conf_docker['image_name'],
                    image_registry_details=image_registry_details,
                    user_managed=user_managed_dependencies,
                    source_directory_data_store=input_ds)

    run = experiment.submit(est)
datastores = ws.datastores
for name, ds in datastores.items():
    print(name, ds.datastore_type)
#ws.set_default_datastore('chexrayds')

# In[48]:

with open(os.path.join(notshared_dir, 'credentials.json')) as creds:
    credentials = json.load(creds)

#print(credentials)
ds = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name=credentials['datastore_name'],
    container_name=credentials['container_name'],
    account_name=credentials['account_name'],
    account_key=credentials['account_key'],
    create_if_not_exists=False)

# In[49]:

ds = Datastore.get(ws, datastore_name='chexrayds')
print(ds.name)

# In[ ]:

# In[50]:

import azureml.data
from azureml.data.data_reference import DataReference
# In[ ]:


def_blob_store.upload_files(
    ["./data/20news.pkl"],
    target_path="20newsgroups",
    overwrite=True)


# In[129]:


datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                                    datastore_name='your', 
                                                    account_name='lokeshdata',
                                                    container_name='yourc',
                                                    account_key='L6ot0h04xROx/83/W6AymEAR7f66KuhVLKxOCm1SvcMAg70yrJv32mcY389mOoSPVyfRxuTYr3eSZpGF0WHPUg==',
                                                    subscription_id = "c3ef02ec-8e19-415f-a0d7-b562b6b78b11",
                      
                                                    create_if_not_exists=True)


# In[133]:


import azureml.data
from azureml.data.azure_storage_datastore import AzureFileDatastore, AzureBlobDatastore


# In[ ]:

Esempio n. 26
0
def main(req: func.HttpRequest) -> (func.HttpResponse):
    logging.info('Python HTTP trigger function processed a request.')

    # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string>
    image_url = req.params.get('start')
    logging.info(type(image_url))

    # Use service principal secrets to create authentication vehicle and 
    # define workspace object
    try:    
        svc_pr = ServicePrincipalAuthentication(
            tenant_id=os.getenv('TENANT_ID', ''),
            service_principal_id=os.getenv('APP_ID', ''),
            service_principal_password=os.getenv('PRINCIPAL_PASSWORD', ''))

        ws = Workspace(subscription_id=os.getenv('AZURE_SUB', ''),
                    resource_group=os.getenv('RESOURCE_GROUP', ''),
                    workspace_name=os.getenv('WORKSPACE_NAME',''),
                    auth=svc_pr)
        print("Found workspace {} at location {} using Azure CLI \
            authentication".format(ws.name, ws.location))
    # Usually because authentication didn't work
    except ProjectSystemException as err:
        print('Authentication did not work.')
        return json.dumps('ProjectSystemException')
    # Need to create the workspace
    except Exception as err:
        ws = Workspace.create(name=os.getenv('WORKSPACE_NAME', ''),
                    subscription_id=os.getenv('AZURE_SUB', ''), 
                    resource_group=os.getenv('RESOURCE_GROUP', ''),
                    create_resource_group=True,
                    location='westus', # Or other supported Azure region   
                    auth=svc_pr)
        print("Created workspace {} at location {}".format(ws.name, ws.location))

       

    # choose a name for your cluster - under 16 characters
    cluster_name = "gpuforpytorch"

    try:
        compute_target = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing compute target.')
    except ComputeTargetException:
        print('Creating a new compute target...')
        # AML Compute config - if max_nodes are set, it becomes persistent storage that scales
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',
                                                            min_nodes=0,
                                                            max_nodes=2)
        # create the cluster
        compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current cluster. 
    # print(compute_target.get_status().serialize())

    # # Create a project directory and copy training script to ii
    project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project')
    # os.makedirs(project_folder, exist_ok=True)
    # shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'), project_folder)

    # Create an experiment
    experiment_name = 'fish-no-fish'
    experiment = Experiment(ws, name=experiment_name)

    # Use an AML Data Store for training data
    ds = Datastore.register_azure_blob_container(workspace=ws, 
        datastore_name='funcdefaultdatastore', 
        container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), 
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Use an AML Data Store to save models back up to
    ds_models = Datastore.register_azure_blob_container(workspace=ws, 
        datastore_name='modelsdatastorage', 
        container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), 
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Set up for training ("trans" flag means - use transfer learning and 
    # this should download a model on compute)
    # Using /tmp to store model and info due to the fact that
    # creating new folders and files on the Azure Function host
    # will trigger the function to restart.
    script_params = {
        '--data_dir': ds.as_mount(),
        '--num_epochs': 30,
        '--learning_rate': 0.01,
        '--output_dir': '/tmp/outputs',
        '--trans': 'True'
    }

    # Instantiate PyTorch estimator with upload of final model to
    # a specified blob storage container (this can be anything)
    estimator = PyTorch(source_directory=project_folder, 
                        script_params=script_params,
                        compute_target=compute_target,
                        entry_script='pytorch_train.py',
                        use_gpu=True,
                        inputs=[ds_models.as_upload(path_on_compute='./outputs/model_finetuned.pth')])

    run = experiment.submit(estimator)
    print(run.get_details())
    
    # # The following would certainly be blocking, but that's ok for debugging
    # while run.get_status() not in ['Completed', 'Failed']: # For example purposes only, not exhaustive
    #    print('Run {} not in terminal state'.format(run.id))
    #    time.sleep(10)

    return json.dumps(run.get_status())
Esempio n. 27
0
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP_NAME")
UPDATE_DATA = os.environ.get("UPDATE_DATA")
STORAGE_NAME = os.environ.get("STORAGE_NAME")
EPIS_CONTAINER = os.environ.get("EPIS_CONTAINER")
EPIS_DATASTORE = os.environ.get("EPIS_DATASTORE")
STORAGE_ACCOUNT_KEY = os.environ.get("STORAGE_ACCOUNT_KEY")

SP_AUTH = ServicePrincipalAuthentication(
    tenant_id=TENANT_ID,
    service_principal_id=APP_ID,
    service_principal_password=APP_SECRET)

WORKSPACE = Workspace.get(
    WORKSPACE_NAME,
    SP_AUTH,
    SUBSCRIPTION_ID,
    RESOURCE_GROUP
)

try:
    if UPDATE_DATA:
        Datastore.register_azure_blob_container(WORKSPACE, EPIS_DATASTORE, 
                                                EPIS_CONTAINER, STORAGE_NAME, sas_token=None, 
                                                account_key=STORAGE_ACCOUNT_KEY, protocol=None, endpoint=None, 
                                                overwrite=True, create_if_not_exists=True,
                                                subscription_id=SUBSCRIPTION_ID, resource_group=RESOURCE_GROUP)
        print("Dataset EPIS registered")
    print("Dataset EPIS registered successfully")
except Exception as caught_error:
    print("Error while registering the dataset on datastore: " + str(caught_error))
    sys.exit(1)
ds = ws.get_default_datastore()

# Upload local "data" folder (incl. files) as "tfdata" folder
ds.upload(src_dir='./data', target_path='tfdata', overwrite=True)

#%% [markdown]
# ## Use your own blob storage
#
# You can also use your own blob storage. Set your previously generated storage account name, key, and container.

#%%
from azureml.core import Datastore

ds = Datastore.register_azure_blob_container(ws,
                                             datastore_name='myblob01',
                                             account_name='amltest01',
                                             account_key='BAYcnjJ/TK...',
                                             container_name='container01',
                                             overwrite=True)

# Upload local "data" folder (incl. files) as "tfdata" folder
ds.upload(src_dir='./data', target_path='tfdata', overwrite=True)

#%% [markdown]
# Get the generated Datastore, and upload again.

#%%
# Get your own registered datastore
ds = Datastore.get(ws, datastore_name='myblob01')

# Upload local "data" folder (incl. files) as "tfdata" folder
ds.upload(src_dir='./data', target_path='tfdata', overwrite=True)
Esempio n. 29
0
blob_datastore_name = env.blob_datastore_name
# Name of Azure blob container
container_name = env.blob_container_name
# Storage account name
account_name = env.storage_account_name
# Storage account access key
account_key = env.storage_account_key

# Verify that the blob store does not exist already
try:
    blob_datastore = Datastore.get(aml_workspace, blob_datastore_name)
    print('Found existing datastore, use it.')
except HttpOperationError:
    blob_datastore = Datastore.register_azure_blob_container(
        workspace=aml_workspace,
        datastore_name=blob_datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key)
    print("Registered blob datastore with name: %s" % blob_datastore_name)

# Register dataset without creating new version
input_datastore_paths = [DataPath(blob_datastore, env.input_dataset_name)]
input_dataset = Dataset.File.from_files(path=input_datastore_paths)
input_dataset = input_dataset.register(workspace=aml_workspace,
                                       name=env.input_dataset_name,
                                       description=env.input_dataset_name)
print("Registered dataset: %s" % input_dataset.name)

waves_datastore_paths = [DataPath(blob_datastore, env.waves_dataset_name)]
waves_dataset = Dataset.File.from_files(path=waves_datastore_paths)
waves_dataset = waves_dataset.register(workspace=aml_workspace,
        min_nodes=0,
        max_nodes=1,
    )
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

old_datastore = [ds for ds in ws.datastores if ds == "telemetry"]
if old_datastore:
    old_ds = Datastore.get(ws, "telemetry")
    old_ds.unregister()

telemetry_ds = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name='telemetry',
    container_name=args.storage_container,
    account_name=args.storage_account,
    account_key=args.storage_key,
)

input_data = DataReference(
    datastore=telemetry_ds,
    data_reference_name="input_data",
    path_on_datastore=args.storage_path,
)

preprocessing_est = SKLearn(
    source_directory='010-preprocessing',
    compute_target=cpu_cluster,
    entry_script='dataprep.py',
    conda_packages=['pandas'],