Esempio n. 1
0
    def submit_job(self, job_configuration):
        try:
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.as_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.gpu_enabled,
                job_configuration.docker_repo,
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            if job_configuration.max_dedicated_nodes and not job_configuration.max_low_pri_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetDedicatedNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_dedicated_nodes, job_configuration.max_dedicated_nodes)
            elif job_configuration.max_low_pri_nodes and not job_configuration.max_dedicated_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetLowPriorityNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_low_pri_nodes, job_configuration.max_low_pri_nodes)
            else:
                raise error.AztkError("Jobs do not support both dedicated and low priority nodes." \
                                      " JobConfiguration fields max_dedicated_nodes and max_low_pri_nodes are mutually exclusive values.")

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Esempio n. 2
0
def create_cluster(core_cluster_operations,
                   spark_cluster_operations,
                   cluster_conf: models.ClusterConfiguration,
                   vm_image: base_models.VmImage,
                   wait: bool = False):
    """
    Create a new aztk spark cluster

    Args:
        cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
        wait(bool): If you should wait for the cluster to be ready before returning
        vm_image: models for cluster vm

    Returns:
        :obj:`aztk.spark.models.Cluster`
    """
    cluster_conf = _apply_default_for_cluster_config(cluster_conf)
    cluster_conf.validate()

    cluster_data = core_cluster_operations.get_cluster_data(
        cluster_conf.cluster_id)
    try:
        zip_resource_files = None
        node_data = NodeData(cluster_conf).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(
            node_data).to_resource_file()

        start_task = spark_cluster_operations._generate_cluster_start_task(
            core_cluster_operations,
            zip_resource_files,
            cluster_conf.cluster_id,
            cluster_conf.gpu_enabled(),
            cluster_conf.get_docker_repo(),
            cluster_conf.get_docker_run_options(),
            cluster_conf.file_shares,
            cluster_conf.mixed_mode(),
            cluster_conf.worker_on_master,
        )

        software_metadata_key = base_models.Software.spark

        cluster = core_cluster_operations.create(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

        # Wait for the master to be ready
        if wait:
            util.wait_for_master_to_be_ready(core_cluster_operations,
                                             spark_cluster_operations,
                                             cluster.id)
            cluster = spark_cluster_operations.get(cluster.id)

        return cluster

    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Esempio n. 3
0
    def submit_job(self, job_configuration: models.JobConfiguration):
        try:
            job_configuration = _apply_default_for_job_config(
                job_configuration)
            job_configuration.validate()
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.to_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.id,
                job_configuration.gpu_enabled,
                job_configuration.get_docker_repo(),
                mixed_mode=job_configuration.mixed_mode(),
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                                "$TargetLowPriorityNodes = {1}".format(
                                    job_configuration.max_dedicated_nodes,
                                    job_configuration.max_low_pri_nodes)

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Esempio n. 4
0
def submit_job(core_job_operations,
               spark_job_operations,
               job_configuration: models.JobConfiguration,
               wait: bool = False):
    try:
        job_configuration = _apply_default_for_job_config(job_configuration)
        job_configuration.validate()
        cluster_data = core_job_operations.get_cluster_data(job_configuration.id)
        node_data = NodeData(job_configuration.to_cluster_config()).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

        start_task = spark_job_operations._generate_cluster_start_task(
            core_job_operations,
            zip_resource_files,
            job_configuration.id,
            job_configuration.gpu_enabled,
            job_configuration.get_docker_repo(),
            job_configuration.get_docker_run_options(),
            mixed_mode=job_configuration.mixed_mode(),
            worker_on_master=job_configuration.worker_on_master,
        )

        application_tasks = []
        for application in job_configuration.applications:
            application_tasks.append((
                application,
                spark_job_operations._generate_application_task(core_job_operations, job_configuration.id, application),
            ))

        job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks)

        software_metadata_key = base_models.Software.spark

        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku="16.04")

        autoscale_formula = "$TargetDedicatedNodes = {0}; " "$TargetLowPriorityNodes = {1}".format(
            job_configuration.max_dedicated_nodes, job_configuration.max_low_pri_nodes)

        job = core_job_operations.submit(
            job_configuration=job_configuration,
            start_task=start_task,
            job_manager_task=job_manager_task,
            autoscale_formula=autoscale_formula,
            software_metadata_key=software_metadata_key,
            vm_image_model=vm_image,
            application_metadata="\n".join(application.name for application in (job_configuration.applications or [])),
        )

        if wait:
            spark_job_operations.wait(id=job_configuration.id)

        return models.Job(job)

    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Esempio n. 5
0
File: client.py Progetto: gridl/aztk
    def create_cluster(self, configuration: models.ClusterConfiguration, wait: bool = False):
        """
        Create a new aztk spark cluster

        Args:
            cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
            wait(bool): If you should wait for the cluster to be ready before returning

        Returns:
            aztk.spark.models.Cluster
        """
        cluster_conf = models.ClusterConfiguration()
        cluster_conf.merge(DEFAULT_CLUSTER_CONFIG)
        cluster_conf.merge(configuration)
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(self,
                                                                           zip_resource_files,
                                                                           cluster_conf.cluster_id,
                                                                           cluster_conf.gpu_enabled(),
                                                                           cluster_conf.get_docker_repo(),
                                                                           cluster_conf.file_shares,
                                                                           cluster_conf.plugins,
                                                                           cluster_conf.mixed_mode(),
                                                                           cluster_conf.worker_on_master)

            software_metadata_key = "spark"

            vm_image = models.VmImage(
                publisher='Canonical',
                offer='UbuntuServer',
                sku='16.04')

            cluster = self.__create_pool_and_job(
                cluster_conf, software_metadata_key, start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Esempio n. 6
0
    def create_cluster(self,
                       cluster_conf: models.ClusterConfiguration,
                       wait: bool = False):
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self, zip_resource_files, cluster_conf.gpu_enabled(),
                cluster_conf.docker_repo,
                cluster_conf.file_shares, cluster_conf.plugins,
                cluster_conf.mixed_mode(), cluster_conf.worker_on_master)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            cluster = self.__create_pool_and_job(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))