コード例 #1
0
    def test_setup_task_execution(self, resource_group, location, storage_account, storage_account_key):
        """Tests setup task execution.
        """
        cluster = Helpers.create_cluster(
            self.client, location, resource_group.name, self.cluster_name, 'STANDARD_D1', 1,
            storage_account.name, storage_account_key,
            setup_task_cmd='echo $GREETING $SECRET_GREETING',
            setup_task_env={'GREETING': 'setup task'},
            setup_task_secrets={'SECRET_GREETING': 'has a secret'})  # type: models.Cluster

        # Verify that the cluster is reported in the list of clusters
        Helpers.assert_existing_clusters_are(self, self.client, resource_group.name, [self.cluster_name])

        # Verify that one node is allocated and become available
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client, resource_group.name, self.cluster_name, 1,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 1)

        # Check that server doesn't return values for secrets
        self.assertEqual(len(cluster.node_setup.setup_task.secrets), 1)
        self.assertEqual(cluster.node_setup.setup_task.secrets[0].name, 'SECRET_GREETING')
        self.assertIsNone(cluster.node_setup.setup_task.secrets[0].value)
        # Verify that the setup task is completed by checking generated output. BatchAI reports a path which was auto-
        # generated for storing setup output logs.
        setup_task_output_path = cluster.node_setup.setup_task.std_out_err_path_suffix
        nodes = Helpers.get_node_ids(self.client, resource_group.name, self.cluster_name)
        self.assertEqual(len(nodes), 1)
        node_id = nodes[0]
        Helpers.assert_file_in_file_share(self, storage_account.name, storage_account_key,
                                          setup_task_output_path,
                                          'stdout-{0}.txt'.format(node_id),
                                          u'setup task has a secret\n')
        Helpers.assert_file_in_file_share(self, storage_account.name, storage_account_key,
                                          setup_task_output_path, 'stderr-{0}.txt'.format(node_id), u'')
        self.client.clusters.delete(resource_group.name, Helpers.DEFAULT_WORKSPACE_NAME, self.cluster_name).result()
コード例 #2
0
    def test_creation_and_deletion(self, resource_group, location, storage_account, storage_account_key):
        """Tests basic use-case scenario.

        1. Create cluster
        2. Execute a task on the host
        3. Execute a task in a docker container
        4. Delete cluster
        """
        cluster = Helpers.create_cluster(
            self.client, location, resource_group.name, self.cluster_name, 'STANDARD_D1', 1,
            storage_account.name, storage_account_key)

        self.assertEqual(cluster.name, self.cluster_name)
        self.assertIsNone(cluster.errors)
        self.assertEqual(cluster.vm_size, 'STANDARD_D1')

        # Verify that the cluster is reported in the list of clusters
        Helpers.assert_existing_clusters_are(self, self.client, resource_group.name, [self.cluster_name])

        # Verify that one node is allocated and become available
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client, resource_group.name, self.cluster_name, 1,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 1)
        Helpers.assert_remote_login_info_reported_for_nodes(self, self.client, resource_group.name,
                                                            self.cluster_name, 1)

        # Verify that the cluster able to run tasks.
        self.assertCanRunJobOnHost(resource_group, location, cluster.id)
        self.assertCanRunJobInContainer(resource_group, location, cluster.id)

        # Test cluster deletion
        self.client.clusters.delete(resource_group.name, Helpers.DEFAULT_WORKSPACE_NAME, self.cluster_name).result()
        Helpers.assert_existing_clusters_are(self, self.client, resource_group.name, [])
コード例 #3
0
    def test_cluster_resizing(self, resource_group, location, storage_account, storage_account_key):
        """Tests manual cluster resizing"""
        cluster = Helpers.create_cluster(
            self.client, location, resource_group.name, self.cluster_name, 'STANDARD_D1', 1,
            storage_account.name, storage_account_key)

        # Verify that one node is allocated and become available
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client, resource_group.name, self.cluster_name, 1,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 1)
        Helpers.assert_remote_login_info_reported_for_nodes(self, self.client, resource_group.name,
                                                            self.cluster_name, 1)

        self.assertCanResizeCluster(resource_group, 0)
        self.assertCanResizeCluster(resource_group, 1)

        # Verify that cluster able to run tasks after resizing.
        self.assertCanRunJobOnHost(resource_group, location, cluster.id)
        self.client.clusters.delete(resource_group.name, Helpers.DEFAULT_WORKSPACE_NAME, self.cluster_name).result()
コード例 #4
0
    def test_auto_scaling(self, resource_group, location, storage_account, storage_account_key):
        """Tests auto-scaling"""
        # Create the cluster with no nodes.
        cluster = Helpers.create_cluster(
            self.client, location, resource_group.name, self.cluster_name, 'STANDARD_D1', 0,
            storage_account.name, storage_account_key)

        # Switch the cluster into auto-scale mode
        self.client.clusters.update(resource_group.name, Helpers.DEFAULT_WORKSPACE_NAME, self.cluster_name,
                                    scale_settings=models.ScaleSettings(
                                        auto_scale=models.AutoScaleSettings(
                                            minimum_node_count=0,
                                            maximum_node_count=1)))

        # Submit a task. BatchAI must increase the number of nodes to execute the task.
        self.assertCanRunJobOnHost(resource_group, location, cluster.id, timeout_sec=Helpers.AUTO_SCALE_TIMEOUT_SEC)

        # Verify that cluster downsized to zero since there are no more jobs for it
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client, resource_group.name, self.cluster_name, 0,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 0)
        self.client.clusters.delete(resource_group.name, Helpers.DEFAULT_WORKSPACE_NAME, self.cluster_name).result()
コード例 #5
0
    def test_file_server(self, resource_group, location, storage_account,
                         storage_account_key):
        """Tests file server functionality

        1. Create file server
        2. Create two clusters with this file server
        3. Check that the file server is mounted:
            a. submit tasks (one from host and another from container) on the first cluster to write data to nfs
            b. submit a task on the second cluster to read the data from nfs
        """
        server = Helpers.create_file_server(
            self.client, location, resource_group.name,
            self.file_server_name)  # type: models.FileServer

        cluster1 = Helpers.create_cluster(
            self.client,
            location,
            resource_group.name,
            'cluster1',
            'STANDARD_D1',
            1,
            storage_account.name,
            storage_account_key,
            file_servers=[
                models.FileServerReference(
                    file_server=models.ResourceId(id=server.id),
                    relative_mount_path='nfs',
                    mount_options="rw")
            ])
        cluster2 = Helpers.create_cluster(
            self.client,
            location,
            resource_group.name,
            'cluster2',
            'STANDARD_D1',
            1,
            storage_account.name,
            storage_account_key,
            file_servers=[
                models.FileServerReference(
                    file_server=models.ResourceId(id=server.id),
                    relative_mount_path='nfs',
                    mount_options="rw")
            ])
        # Verify the file server is reported.
        Helpers.assert_existing_file_servers_are(self, self.client,
                                                 resource_group.name,
                                                 [self.file_server_name])

        # Verify the file server become available in a reasonable time
        self.assertTrue(
            Helpers.wait_for_file_server(self.is_live, self.client,
                                         resource_group.name,
                                         self.file_server_name,
                                         _FILE_SERVER_CREATION_TIMEOUT_SEC))

        # Verify the remote login information and private ip are reported
        server = self.client.file_servers.get(resource_group.name,
                                              Helpers.DEFAULT_WORKSPACE_NAME,
                                              self.file_server_name)
        self.assertRegexpMatches(server.mount_settings.file_server_public_ip,
                                 Helpers.RE_ID_ADDRESS)
        self.assertRegexpMatches(server.mount_settings.file_server_internal_ip,
                                 Helpers.RE_ID_ADDRESS)

        # Verify the clusters allocated nodes successfully
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client,
                                   resource_group.name, 'cluster1', 1,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 1)
        self.assertEqual(
            Helpers.wait_for_nodes(self.is_live, self.client,
                                   resource_group.name, 'cluster2', 1,
                                   Helpers.NODE_STARTUP_TIMEOUT_SEC), 1)

        # Execute publishing tasks on the first cluster
        job1 = Helpers.create_custom_job(
            self.client, resource_group.name, cluster1.id, 'host_publisher', 1,
            'echo hi from host > $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt')
        self.assertEqual(
            Helpers.wait_for_job_completion(self.is_live, self.client,
                                            resource_group.name, job1.name,
                                            Helpers.MINUTE),
            models.ExecutionState.succeeded)
        job2 = Helpers.create_custom_job(
            self.client,
            resource_group.name,
            cluster1.id,
            'container_publisher',
            1,
            'echo hi from container >> $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt',
            container=models.ContainerSettings(
                image_source_registry=models.ImageSourceRegistry(
                    image="ubuntu")))
        self.assertEqual(
            Helpers.wait_for_job_completion(self.is_live, self.client,
                                            resource_group.name, job2.name,
                                            Helpers.MINUTE),
            models.ExecutionState.succeeded)

        # Execute consumer task on the second cluster
        job3 = Helpers.create_custom_job(
            self.client, resource_group.name, cluster2.id, 'consumer', 1,
            'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt; '
            'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt')
        self.assertEqual(
            Helpers.wait_for_job_completion(self.is_live, self.client,
                                            resource_group.name, job3.name,
                                            Helpers.MINUTE),
            models.ExecutionState.succeeded)

        # Verify the data
        Helpers.assert_job_files_are(
            self, self.client, resource_group.name, job3.name,
            Helpers.STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'hi from host\nhi from container\n',
                u'stderr.txt': ''
            })

        # Delete clusters
        self.client.clusters.delete(resource_group.name,
                                    Helpers.DEFAULT_WORKSPACE_NAME,
                                    'cluster1').result()
        self.client.clusters.delete(resource_group.name,
                                    Helpers.DEFAULT_WORKSPACE_NAME,
                                    'cluster2').result()

        # Test deletion
        self.client.file_servers.delete(resource_group.name,
                                        Helpers.DEFAULT_WORKSPACE_NAME,
                                        self.file_server_name).result()
        Helpers.assert_existing_file_servers_are(self, self.client,
                                                 resource_group.name, [])