Esempio n. 1
0
    def create_file_server(client,
                           location,
                           resource_group,
                           nfs_name,
                           subnet_id=None):
        """Creates NFS

        :param BatchAIManagementClient client: client instance.
        :param str location: location.
        :param str resource_group: resource group name.
        :param str nfs_name: file server name.
        :param models.ResourceId subnet_id: id of the subnet.
        :return models.FileServer: created file server.
        """
        client.workspaces.create(resource_group,
                                 Helpers.DEFAULT_WORKSPACE_NAME,
                                 location).result()
        return client.file_servers.create(
            resource_group, Helpers.DEFAULT_WORKSPACE_NAME, nfs_name,
            models.FileServerCreateParameters(
                vm_size='STANDARD_D1',
                ssh_configuration=models.SshConfiguration(
                    user_account_settings=models.UserAccountSettings(
                        admin_user_name=Helpers.ADMIN_USER_NAME,
                        admin_user_password=Helpers.ADMIN_USER_PASSWORD,
                    )),
                data_disks=models.DataDisks(
                    disk_size_in_gb=10,
                    disk_count=2,
                    storage_account_type='Standard_LRS'),
                subnet=subnet_id)).result()
def prepare_batch_ai_workspace(client, service, config):
    # Create Batch AI workspace
    client.workspaces.create(config.workspace_resource_group,
                             config.workspace,
                             config.location)

    # Create GPU cluster
    parameters = models.ClusterCreateParameters(
        # VM size. Use N-series for GPU
        vm_size=config.workspace_vm_size,
        # Configure the ssh users
        user_account_settings=models.UserAccountSettings(
            admin_user_name=config.admin,
            admin_user_password=config.admin_password),
        # Number of VMs in the cluster
        scale_settings=models.ScaleSettings(
            manual=models.ManualScaleSettings(target_node_count=config.workspace_node_count)
        ),
        # Configure each node in the cluster
        node_setup=models.NodeSetup(
            # Mount shared volumes to the host
            mount_volumes=models.MountVolumes(
                azure_file_shares=[
                    models.AzureFileShareReference(
                        account_name=config.storage_account_name,
                        credentials=models.AzureStorageCredentialsInfo(
                            account_key=config.storage_account_key),
                        azure_file_url='https://{0}/{1}'.format(
                            service.primary_endpoint, config.workspace_file_share),
                        relative_mount_path=config.workspace_relative_mount_path)],
            ),
        ),
    )
    client.clusters.create(config.workspace_resource_group, config.workspace, config.workspace_cluster, parameters).result()
Esempio n. 3
0
 def test_experiments_isolation(self, resource_group, location):
     self.client.workspaces.create(resource_group.name, 'first',
                                   location).result()
     self.client.workspaces.create(resource_group.name, 'second',
                                   location).result()
     # Create a cluster, two experiments and a job in each experiment
     for workspace in ['first', 'second']:
         cluster = self.client.clusters.create(
             resource_group.name,
             workspace,
             'cluster',
             parameters=models.ClusterCreateParameters(
                 vm_size='STANDARD_D1',
                 scale_settings=models.ScaleSettings(
                     manual=models.ManualScaleSettings(
                         target_node_count=0)),
                 user_account_settings=models.UserAccountSettings(
                     admin_user_name=helpers.ADMIN_USER_NAME,
                     admin_user_password=helpers.ADMIN_USER_PASSWORD),
                 vm_priority='lowpriority')).result()
         for experiment in ['exp1', 'exp2']:
             self.client.experiments.create(resource_group.name, workspace,
                                            experiment).result()
             self.client.jobs.create(
                 resource_group.name,
                 workspace,
                 experiment,
                 'job',
                 parameters=models.JobCreateParameters(
                     cluster=models.ResourceId(id=cluster.id),
                     node_count=1,
                     std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT',
                     custom_toolkit_settings=models.CustomToolkitSettings(
                         command_line='true'))).result()
     # Delete exp1 in the first workspace
     self.client.experiments.delete(resource_group.name, 'first',
                                    'exp1').result()
     # Ensure the experiment was actually deleted
     self.assertRaises(
         CloudError, lambda: self.client.experiments.get(
             resource_group.name, 'first', 'exp1'))
     for workspace in ['first', 'second']:
         # Ensure the clusters are not affected
         self.client.clusters.get(resource_group.name, workspace, 'cluster')
         # Ensure the other experiments are not affected
         for experiment in ['exp1', 'exp2']:
             if workspace == 'first' and experiment == 'exp1':
                 continue
             self.client.experiments.get(resource_group.name, workspace,
                                         experiment)
             job = self.client.jobs.get(resource_group.name, workspace,
                                        experiment, 'job')
             # And check the job are not terminated
             self.assertEqual(job.execution_state,
                              models.ExecutionState.queued)
Esempio n. 4
0
def cluster_parameters_for(config, container_settings, volumes):
    return models.ClusterCreateParameters(
        virtual_machine_configuration=models.VirtualMachineConfiguration(
            image_reference=models.ImageReference(offer='UbuntuServer',
                                                  publisher='Canonical',
                                                  sku='16.04-LTS',
                                                  version='16.04.201708151')),
        location=config.location,
        vm_size=config.vm_type,
        user_account_settings=models.UserAccountSettings(
            admin_user_name=config.admin_user['name'],
            admin_user_password=config.admin_user['password']),
        scale_settings=models.ScaleSettings(manual=models.ManualScaleSettings(
            target_node_count=config.node_count)),
        node_setup=models.NodeSetup(mount_volumes=volumes))
Esempio n. 5
0
def _update_user_account_settings(params, admin_user_name, ssh_key, password):
    """Update account settings of cluster or file server creation parameters

    :param models.ClusterCreateParameters or models.FileServerCreateParameters params: params to update
    :param str or None admin_user_name: name of admin user to create.
    :param str or None ssh_key: ssh public key value or path to the file containing the key.
    :param str or None password: password.
    :return models.ClusterCreateParameters: updated parameters.
    """
    result = copy.deepcopy(params)
    if hasattr(result, 'user_account_settings'):
        parent = result
    else:
        if result.ssh_configuration is None:
            result.ssh_configuration = models.SshConfiguration(user_account_settings=None)
        parent = result.ssh_configuration
    if parent.user_account_settings is None:
        parent.user_account_settings = models.UserAccountSettings()
    # Get effective user name, password and key trying them in the following order: provided via command line,
    # provided in the config file, current user name and his default public ssh key.
    effective_user_name = admin_user_name or parent.user_account_settings.admin_user_name or getpass.getuser()
    effective_password = password or parent.user_account_settings.admin_user_password
    # Use default ssh public key only if no password is configured.
    effective_key = (ssh_key or parent.user_account_settings.admin_user_ssh_public_key or
                     (None if effective_password else _get_default_ssh_public_key_location()))
    if effective_key:
        if os.path.exists(os.path.expanduser(effective_key)):
            with open(os.path.expanduser(effective_key)) as f:
                effective_key = f.read()
    try:
        if effective_key and not keys.is_valid_ssh_rsa_public_key(effective_key):
            raise CLIError('Incorrect ssh public key value.')
    except Exception:
        raise CLIError('Incorrect ssh public key value.')

    parent.user_account_settings.admin_user_name = effective_user_name
    parent.user_account_settings.admin_user_ssh_public_key = effective_key
    parent.user_account_settings.admin_user_password = effective_password

    if not parent.user_account_settings.admin_user_name:
        raise CLIError('Please provide admin user name.')

    if (not parent.user_account_settings.admin_user_ssh_public_key and
            not parent.user_account_settings.admin_user_password):
        raise CLIError('Please provide admin user password or ssh key.')

    return result
Esempio n. 6
0
def _update_user_account_settings(params, admin_user_name, ssh_key, password):
    """Update account settings of cluster or file server creation parameters

    :param models.ClusterCreateParameters or models.FileServerCreateParameters params: params to update
    :param str or None admin_user_name: name of admin user to create.
    :param str or None ssh_key: ssh public key value or path to the file containing the key.
    :param str or None password: password.
    :return models.ClusterCreateParameters: updated parameters.
    """
    result = copy.deepcopy(params)
    key = ssh_key
    if ssh_key:
        if os.path.exists(os.path.expanduser(ssh_key)):
            with open(os.path.expanduser(ssh_key)) as f:
                key = f.read()

        if not is_valid_ssh_rsa_public_key(key):
            raise CLIError('Incorrect ssh public key value.')

    if hasattr(result, 'user_account_settings'):
        parent = result
    else:
        if result.ssh_configuration is None:
            result.ssh_configuration = models.SshConfiguration(None)
        parent = result.ssh_configuration
    if parent.user_account_settings is None:
        parent.user_account_settings = models.UserAccountSettings(
            admin_user_name=admin_user_name, admin_user_ssh_public_key=key)
    if admin_user_name:
        parent.user_account_settings.admin_user_name = admin_user_name
    if key:
        parent.user_account_settings.admin_user_ssh_public_key = key
    if password:
        parent.user_account_settings.admin_user_password = password

    if not parent.user_account_settings.admin_user_name:
        raise CLIError('Please provide admin user name.')

    if (not parent.user_account_settings.admin_user_ssh_public_key and
            not parent.user_account_settings.admin_user_password):
        raise CLIError('Please provide admin user password or ssh key.')

    return result
Esempio n. 7
0
    def create_cluster(client,
                       location,
                       resource_group,
                       cluster_name,
                       vm_size,
                       target_nodes,
                       storage_account,
                       storage_account_key,
                       file_servers=None,
                       file_systems=None,
                       subnet_id=None,
                       setup_task_cmd=None,
                       setup_task_env=None,
                       setup_task_secrets=None):
        """Creates a cluster with given parameters and mounted Azure Files

        :param BatchAIManagementClient client: client instance.
        :param str location: location.
        :param str resource_group: resource group name.
        :param str cluster_name: name of the cluster.
        :param str vm_size: vm size.
        :param int target_nodes: number of nodes.
        :param str storage_account: name of the storage account.
        :param str storage_account_key: storage account key.
        :param list(models.FileServerReference) file_servers: file servers.
        :param list(models.UnmanagedFileServerReference) file_systems: file systems.
        :param str setup_task_cmd: start task cmd line.
        :param dict[str, str] setup_task_env: environment variables for start task.
        :param dict[str, str] setup_task_secrets: environment variables with secret values for start task, server doesn't
                                                  return values for these environment variables in get cluster responses.
        :param str subnet_id: virtual network subnet id.
        :return models.Cluster: the created cluster
        """
        Helpers._create_file_share(storage_account, storage_account_key)
        setup_task = None
        if setup_task_cmd:
            setup_task = models.SetupTask(
                command_line=setup_task_cmd,
                environment_variables=[
                    models.EnvironmentVariable(name=k, value=v)
                    for k, v in setup_task_env.items()
                ],
                secrets=[
                    models.EnvironmentVariableWithSecretValue(name=k, value=v)
                    for k, v in setup_task_secrets.items()
                ],
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    Helpers.AZURE_FILES_MOUNTING_PATH))
        client.workspaces.create(resource_group,
                                 Helpers.DEFAULT_WORKSPACE_NAME,
                                 location).result()
        return client.clusters.create(
            resource_group,
            Helpers.DEFAULT_WORKSPACE_NAME,
            cluster_name,
            parameters=models.ClusterCreateParameters(
                vm_size=vm_size,
                scale_settings=models.ScaleSettings(
                    manual=models.ManualScaleSettings(
                        target_node_count=target_nodes)),
                node_setup=models.NodeSetup(
                    mount_volumes=models.MountVolumes(
                        azure_file_shares=[
                            models.AzureFileShareReference(
                                azure_file_url=
                                'https://{0}.file.core.windows.net/{1}'.format(
                                    storage_account, Helpers.AZURE_FILES_NAME),
                                relative_mount_path=Helpers.
                                AZURE_FILES_MOUNTING_PATH,
                                account_name=storage_account,
                                credentials=models.AzureStorageCredentialsInfo(
                                    account_key=storage_account_key),
                            )
                        ],
                        file_servers=file_servers,
                        unmanaged_file_systems=file_systems),
                    setup_task=setup_task),
                subnet=subnet_id,
                user_account_settings=models.UserAccountSettings(
                    admin_user_name=Helpers.ADMIN_USER_NAME,
                    admin_user_password=Helpers.ADMIN_USER_PASSWORD),
                vm_priority='lowpriority')).result()
Esempio n. 8
0
filesystem = FileService(storage_account_name, storage_account_key)

for f in ['Train-28x28_cntk_text.txt', 'Test-28x28_cntk_text.txt', 'ConvNet_MNIST.py']:
  filesystem.create_file_from_path(fileshare, "data", f, "z:/script/"+f)

## Create Cluster

cluster_name = 'shwarscluster'
relative_mount_point = 'azurefileshare'

parameters = models.ClusterCreateParameters(
    location='northeurope',
    vm_size='STANDARD_NC6',
    user_account_settings=models.UserAccountSettings(
         admin_user_name="shwars",
         admin_user_password="******"),
    scale_settings=models.ScaleSettings(
         manual=models.ManualScaleSettings(target_node_count=1)
     ),
    node_setup=models.NodeSetup(
        # Mount shared volumes to the host
         mount_volumes=models.MountVolumes(
             azure_file_shares=[
                 models.AzureFileShareReference(
                     account_name=storage_account_name,
                     credentials=models.AzureStorageCredentialsInfo(
         account_key=storage_account_key),
         azure_file_url='https://{0}.file.core.windows.net/{1}'.format(
               storage_account_name, fileshare),
                  relative_mount_path = relative_mount_point)],