Exemple #1
0
    def create_cluster(self,
                       cluster_conf: models.ClusterConfiguration,
                       wait: bool = False):
        try:
            zip_resource_files = upload_node_scripts.zip_scripts(
                self.blob_client, cluster_conf.cluster_id,
                cluster_conf.custom_scripts, cluster_conf.spark_configuration,
                cluster_conf.user_configuration)

            start_task = create_cluster_helper.generate_cluster_start_task(
                self, zip_resource_files, cluster_conf.gpu_enabled,
                cluster_conf.docker_repo, cluster_conf.file_shares)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            cluster = self.__create_pool_and_job(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemple #2
0
    def submit_job(self, job_configuration):
        try:
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.as_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.gpu_enabled,
                job_configuration.docker_repo,
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            if job_configuration.max_dedicated_nodes and not job_configuration.max_low_pri_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetDedicatedNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_dedicated_nodes, job_configuration.max_dedicated_nodes)
            elif job_configuration.max_low_pri_nodes and not job_configuration.max_dedicated_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetLowPriorityNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_low_pri_nodes, job_configuration.max_low_pri_nodes)
            else:
                raise error.AztkError("Jobs do not support both dedicated and low priority nodes." \
                                      " JobConfiguration fields max_dedicated_nodes and max_low_pri_nodes are mutually exclusive values.")

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemple #3
0
    def submit_job(self, job_configuration: models.JobConfiguration):
        try:
            job_configuration = _apply_default_for_job_config(
                job_configuration)
            job_configuration.validate()
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.to_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.id,
                job_configuration.gpu_enabled,
                job_configuration.get_docker_repo(),
                mixed_mode=job_configuration.mixed_mode(),
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                                "$TargetLowPriorityNodes = {1}".format(
                                    job_configuration.max_dedicated_nodes,
                                    job_configuration.max_low_pri_nodes)

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemple #4
0
def submit_job(core_job_operations,
               spark_job_operations,
               job_configuration: models.JobConfiguration,
               wait: bool = False):
    try:
        job_configuration = _apply_default_for_job_config(job_configuration)
        job_configuration.validate()
        cluster_data = core_job_operations.get_cluster_data(job_configuration.id)
        node_data = NodeData(job_configuration.to_cluster_config()).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

        start_task = spark_job_operations._generate_cluster_start_task(
            core_job_operations,
            zip_resource_files,
            job_configuration.id,
            job_configuration.gpu_enabled,
            job_configuration.get_docker_repo(),
            job_configuration.get_docker_run_options(),
            mixed_mode=job_configuration.mixed_mode(),
            worker_on_master=job_configuration.worker_on_master,
        )

        application_tasks = []
        for application in job_configuration.applications:
            application_tasks.append((
                application,
                spark_job_operations._generate_application_task(core_job_operations, job_configuration.id, application),
            ))

        job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks)

        software_metadata_key = base_models.Software.spark

        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku="16.04")

        autoscale_formula = "$TargetDedicatedNodes = {0}; " "$TargetLowPriorityNodes = {1}".format(
            job_configuration.max_dedicated_nodes, job_configuration.max_low_pri_nodes)

        job = core_job_operations.submit(
            job_configuration=job_configuration,
            start_task=start_task,
            job_manager_task=job_manager_task,
            autoscale_formula=autoscale_formula,
            software_metadata_key=software_metadata_key,
            vm_image_model=vm_image,
            application_metadata="\n".join(application.name for application in (job_configuration.applications or [])),
        )

        if wait:
            spark_job_operations.wait(id=job_configuration.id)

        return models.Job(job)

    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemple #5
0
    def create(self, cluster_configuration: models.ClusterConfiguration, vm_ver, wait: bool = False):
        """Create a cluster.

        Args:
            vm_ver: (:obj:`string`) ubuntu vm version
            cluster_configuration (:obj:`ClusterConfiguration`): Configuration for the cluster to be created.
            wait (:obj:`bool`): if True, this function will block until the cluster creation is finished.

        Returns:
            :obj:`aztk.spark.models.Cluster`: An Cluster object representing the state and configuration of the cluster.
        """

        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku=vm_ver)
        return create.create_cluster(self._core_cluster_operations, self, cluster_configuration, vm_image, wait)
Exemple #6
0
    def create_cluster(self, configuration: models.ClusterConfiguration, wait: bool = False):
        """
        Create a new aztk spark cluster

        Args:
            cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
            wait(bool): If you should wait for the cluster to be ready before returning

        Returns:
            aztk.spark.models.Cluster
        """
        cluster_conf = models.ClusterConfiguration()
        cluster_conf.merge(DEFAULT_CLUSTER_CONFIG)
        cluster_conf.merge(configuration)
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(self,
                                                                           zip_resource_files,
                                                                           cluster_conf.cluster_id,
                                                                           cluster_conf.gpu_enabled(),
                                                                           cluster_conf.get_docker_repo(),
                                                                           cluster_conf.file_shares,
                                                                           cluster_conf.plugins,
                                                                           cluster_conf.mixed_mode(),
                                                                           cluster_conf.worker_on_master)

            software_metadata_key = "spark"

            vm_image = models.VmImage(
                publisher='Canonical',
                offer='UbuntuServer',
                sku='16.04')

            cluster = self.__create_pool_and_job(
                cluster_conf, software_metadata_key, start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemple #7
0
    def submit(self, job_configuration: models.JobConfiguration, vm_os_ver, wait: bool = False):
        """Submit a job

        Jobs are a cluster definition and one or many application definitions which run on the cluster. The job's
        cluster will be allocated and configured, then the applications will be executed with their output stored
        in Azure Storage. When all applications have completed, the cluster will be automatically deleted.

        Args:
            job_configuration (:obj:`aztk.spark.models.JobConfiguration`): Model defining the job's configuration.
            vm_os_ver (:obj:`string`): ubuntu vm os version, possible values: 16.04|18.04
            wait (:obj:`bool`): If True, blocks until job is completed. Defaults to False.

        Returns:
            :obj:`aztk.spark.models.Job`: Model representing the state of the job.
        """
        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku=vm_os_ver)
        return submit.submit_job(self._core_job_operations, self, job_configuration, vm_image, wait)
Exemple #8
0
    def create_cluster(self,
                       cluster_conf: models.ClusterConfiguration,
                       wait: bool = False):
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self, zip_resource_files, cluster_conf.gpu_enabled(),
                cluster_conf.docker_repo,
                cluster_conf.file_shares, cluster_conf.plugins,
                cluster_conf.mixed_mode(), cluster_conf.worker_on_master)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            cluster = self.__create_pool_and_job(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemple #9
0
from aztk.spark import models

SPARK_VM_IMAGE = models.VmImage(publisher='Canonical',
                                offer='UbuntuServer',
                                sku='16.04')
Exemple #10
0
from aztk.spark import models

SPARK_VM_IMAGE = models.VmImage(publisher="Canonical",
                                offer="UbuntuServer",
                                sku="16.04")