def create_file_server(client, location, resource_group, nfs_name, subnet_id=None): """Creates NFS :param BatchAIManagementClient client: client instance. :param str location: location. :param str resource_group: resource group name. :param str nfs_name: file server name. :param models.ResourceId subnet_id: id of the subnet. :return models.FileServer: created file server. """ client.workspaces.create(resource_group, Helpers.DEFAULT_WORKSPACE_NAME, location).result() return client.file_servers.create( resource_group, Helpers.DEFAULT_WORKSPACE_NAME, nfs_name, models.FileServerCreateParameters( vm_size='STANDARD_D1', ssh_configuration=models.SshConfiguration( user_account_settings=models.UserAccountSettings( admin_user_name=Helpers.ADMIN_USER_NAME, admin_user_password=Helpers.ADMIN_USER_PASSWORD, )), data_disks=models.DataDisks( disk_size_in_gb=10, disk_count=2, storage_account_type='Standard_LRS'), subnet=subnet_id)).result()
def prepare_batch_ai_workspace(client, service, config): # Create Batch AI workspace client.workspaces.create(config.workspace_resource_group, config.workspace, config.location) # Create GPU cluster parameters = models.ClusterCreateParameters( # VM size. Use N-series for GPU vm_size=config.workspace_vm_size, # Configure the ssh users user_account_settings=models.UserAccountSettings( admin_user_name=config.admin, admin_user_password=config.admin_password), # Number of VMs in the cluster scale_settings=models.ScaleSettings( manual=models.ManualScaleSettings(target_node_count=config.workspace_node_count) ), # Configure each node in the cluster node_setup=models.NodeSetup( # Mount shared volumes to the host mount_volumes=models.MountVolumes( azure_file_shares=[ models.AzureFileShareReference( account_name=config.storage_account_name, credentials=models.AzureStorageCredentialsInfo( account_key=config.storage_account_key), azure_file_url='https://{0}/{1}'.format( service.primary_endpoint, config.workspace_file_share), relative_mount_path=config.workspace_relative_mount_path)], ), ), ) client.clusters.create(config.workspace_resource_group, config.workspace, config.workspace_cluster, parameters).result()
def test_experiments_isolation(self, resource_group, location): self.client.workspaces.create(resource_group.name, 'first', location).result() self.client.workspaces.create(resource_group.name, 'second', location).result() # Create a cluster, two experiments and a job in each experiment for workspace in ['first', 'second']: cluster = self.client.clusters.create( resource_group.name, workspace, 'cluster', parameters=models.ClusterCreateParameters( vm_size='STANDARD_D1', scale_settings=models.ScaleSettings( manual=models.ManualScaleSettings( target_node_count=0)), user_account_settings=models.UserAccountSettings( admin_user_name=helpers.ADMIN_USER_NAME, admin_user_password=helpers.ADMIN_USER_PASSWORD), vm_priority='lowpriority')).result() for experiment in ['exp1', 'exp2']: self.client.experiments.create(resource_group.name, workspace, experiment).result() self.client.jobs.create( resource_group.name, workspace, experiment, 'job', parameters=models.JobCreateParameters( cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT', custom_toolkit_settings=models.CustomToolkitSettings( command_line='true'))).result() # Delete exp1 in the first workspace self.client.experiments.delete(resource_group.name, 'first', 'exp1').result() # Ensure the experiment was actually deleted self.assertRaises( CloudError, lambda: self.client.experiments.get( resource_group.name, 'first', 'exp1')) for workspace in ['first', 'second']: # Ensure the clusters are not affected self.client.clusters.get(resource_group.name, workspace, 'cluster') # Ensure the other experiments are not affected for experiment in ['exp1', 'exp2']: if workspace == 'first' and experiment == 'exp1': continue self.client.experiments.get(resource_group.name, workspace, experiment) job = self.client.jobs.get(resource_group.name, workspace, experiment, 'job') # And check the job are not terminated self.assertEqual(job.execution_state, models.ExecutionState.queued)
def cluster_parameters_for(config, container_settings, volumes): return models.ClusterCreateParameters( virtual_machine_configuration=models.VirtualMachineConfiguration( image_reference=models.ImageReference(offer='UbuntuServer', publisher='Canonical', sku='16.04-LTS', version='16.04.201708151')), location=config.location, vm_size=config.vm_type, user_account_settings=models.UserAccountSettings( admin_user_name=config.admin_user['name'], admin_user_password=config.admin_user['password']), scale_settings=models.ScaleSettings(manual=models.ManualScaleSettings( target_node_count=config.node_count)), node_setup=models.NodeSetup(mount_volumes=volumes))
def _update_user_account_settings(params, admin_user_name, ssh_key, password): """Update account settings of cluster or file server creation parameters :param models.ClusterCreateParameters or models.FileServerCreateParameters params: params to update :param str or None admin_user_name: name of admin user to create. :param str or None ssh_key: ssh public key value or path to the file containing the key. :param str or None password: password. :return models.ClusterCreateParameters: updated parameters. """ result = copy.deepcopy(params) if hasattr(result, 'user_account_settings'): parent = result else: if result.ssh_configuration is None: result.ssh_configuration = models.SshConfiguration(user_account_settings=None) parent = result.ssh_configuration if parent.user_account_settings is None: parent.user_account_settings = models.UserAccountSettings() # Get effective user name, password and key trying them in the following order: provided via command line, # provided in the config file, current user name and his default public ssh key. effective_user_name = admin_user_name or parent.user_account_settings.admin_user_name or getpass.getuser() effective_password = password or parent.user_account_settings.admin_user_password # Use default ssh public key only if no password is configured. effective_key = (ssh_key or parent.user_account_settings.admin_user_ssh_public_key or (None if effective_password else _get_default_ssh_public_key_location())) if effective_key: if os.path.exists(os.path.expanduser(effective_key)): with open(os.path.expanduser(effective_key)) as f: effective_key = f.read() try: if effective_key and not keys.is_valid_ssh_rsa_public_key(effective_key): raise CLIError('Incorrect ssh public key value.') except Exception: raise CLIError('Incorrect ssh public key value.') parent.user_account_settings.admin_user_name = effective_user_name parent.user_account_settings.admin_user_ssh_public_key = effective_key parent.user_account_settings.admin_user_password = effective_password if not parent.user_account_settings.admin_user_name: raise CLIError('Please provide admin user name.') if (not parent.user_account_settings.admin_user_ssh_public_key and not parent.user_account_settings.admin_user_password): raise CLIError('Please provide admin user password or ssh key.') return result
def _update_user_account_settings(params, admin_user_name, ssh_key, password): """Update account settings of cluster or file server creation parameters :param models.ClusterCreateParameters or models.FileServerCreateParameters params: params to update :param str or None admin_user_name: name of admin user to create. :param str or None ssh_key: ssh public key value or path to the file containing the key. :param str or None password: password. :return models.ClusterCreateParameters: updated parameters. """ result = copy.deepcopy(params) key = ssh_key if ssh_key: if os.path.exists(os.path.expanduser(ssh_key)): with open(os.path.expanduser(ssh_key)) as f: key = f.read() if not is_valid_ssh_rsa_public_key(key): raise CLIError('Incorrect ssh public key value.') if hasattr(result, 'user_account_settings'): parent = result else: if result.ssh_configuration is None: result.ssh_configuration = models.SshConfiguration(None) parent = result.ssh_configuration if parent.user_account_settings is None: parent.user_account_settings = models.UserAccountSettings( admin_user_name=admin_user_name, admin_user_ssh_public_key=key) if admin_user_name: parent.user_account_settings.admin_user_name = admin_user_name if key: parent.user_account_settings.admin_user_ssh_public_key = key if password: parent.user_account_settings.admin_user_password = password if not parent.user_account_settings.admin_user_name: raise CLIError('Please provide admin user name.') if (not parent.user_account_settings.admin_user_ssh_public_key and not parent.user_account_settings.admin_user_password): raise CLIError('Please provide admin user password or ssh key.') return result
def create_cluster(client, location, resource_group, cluster_name, vm_size, target_nodes, storage_account, storage_account_key, file_servers=None, file_systems=None, subnet_id=None, setup_task_cmd=None, setup_task_env=None, setup_task_secrets=None): """Creates a cluster with given parameters and mounted Azure Files :param BatchAIManagementClient client: client instance. :param str location: location. :param str resource_group: resource group name. :param str cluster_name: name of the cluster. :param str vm_size: vm size. :param int target_nodes: number of nodes. :param str storage_account: name of the storage account. :param str storage_account_key: storage account key. :param list(models.FileServerReference) file_servers: file servers. :param list(models.UnmanagedFileServerReference) file_systems: file systems. :param str setup_task_cmd: start task cmd line. :param dict[str, str] setup_task_env: environment variables for start task. :param dict[str, str] setup_task_secrets: environment variables with secret values for start task, server doesn't return values for these environment variables in get cluster responses. :param str subnet_id: virtual network subnet id. :return models.Cluster: the created cluster """ Helpers._create_file_share(storage_account, storage_account_key) setup_task = None if setup_task_cmd: setup_task = models.SetupTask( command_line=setup_task_cmd, environment_variables=[ models.EnvironmentVariable(name=k, value=v) for k, v in setup_task_env.items() ], secrets=[ models.EnvironmentVariableWithSecretValue(name=k, value=v) for k, v in setup_task_secrets.items() ], std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( Helpers.AZURE_FILES_MOUNTING_PATH)) client.workspaces.create(resource_group, Helpers.DEFAULT_WORKSPACE_NAME, location).result() return client.clusters.create( resource_group, Helpers.DEFAULT_WORKSPACE_NAME, cluster_name, parameters=models.ClusterCreateParameters( vm_size=vm_size, scale_settings=models.ScaleSettings( manual=models.ManualScaleSettings( target_node_count=target_nodes)), node_setup=models.NodeSetup( mount_volumes=models.MountVolumes( azure_file_shares=[ models.AzureFileShareReference( azure_file_url= 'https://{0}.file.core.windows.net/{1}'.format( storage_account, Helpers.AZURE_FILES_NAME), relative_mount_path=Helpers. AZURE_FILES_MOUNTING_PATH, account_name=storage_account, credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key), ) ], file_servers=file_servers, unmanaged_file_systems=file_systems), setup_task=setup_task), subnet=subnet_id, user_account_settings=models.UserAccountSettings( admin_user_name=Helpers.ADMIN_USER_NAME, admin_user_password=Helpers.ADMIN_USER_PASSWORD), vm_priority='lowpriority')).result()
filesystem = FileService(storage_account_name, storage_account_key) for f in ['Train-28x28_cntk_text.txt', 'Test-28x28_cntk_text.txt', 'ConvNet_MNIST.py']: filesystem.create_file_from_path(fileshare, "data", f, "z:/script/"+f) ## Create Cluster cluster_name = 'shwarscluster' relative_mount_point = 'azurefileshare' parameters = models.ClusterCreateParameters( location='northeurope', vm_size='STANDARD_NC6', user_account_settings=models.UserAccountSettings( admin_user_name="shwars", admin_user_password="******"), scale_settings=models.ScaleSettings( manual=models.ManualScaleSettings(target_node_count=1) ), node_setup=models.NodeSetup( # Mount shared volumes to the host mount_volumes=models.MountVolumes( azure_file_shares=[ models.AzureFileShareReference( account_name=storage_account_name, credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key), azure_file_url='https://{0}.file.core.windows.net/{1}'.format( storage_account_name, fileshare), relative_mount_path = relative_mount_point)],