def start_schedule(self, deployment_path: str):
        # Load start_schedule_deployment
        with open(deployment_path, "r") as fr:
            start_schedule_deployment = yaml.safe_load(fr)

        schedule_name = start_schedule_deployment["name"]
        start_schedule_deployment = self._completed_local_job_deployment(start_schedule_deployment)

        # Check resource
        is_satisfied, _ = resource_op(
            self.cluster_details["master"]["resource"],
            start_schedule_deployment["total_request_resource"],
            ResourceOperation.ALLOCATION
        )
        if not is_satisfied:
            raise BadRequestError(f"No enough resource to start schedule {schedule_name} in {self.cluster_name}.")

        # push schedule details to Redis
        self._redis_connection.hset(
            f"{self.cluster_name}:job_details",
            schedule_name,
            json.dumps(start_schedule_deployment)
        )

        job_list = start_schedule_deployment["job_names"]
        # switch schedule details into job details
        job_detail = copy.deepcopy(start_schedule_deployment)
        del job_detail["job_names"]

        for job_name in job_list:
            job_detail["name"] = job_name

            self._push_pending_job(job_detail)
    def delete(self):
        logger.info(f"Deleting cluster {self.cluster_name}")

        # Remove local cluster file.
        shutil.rmtree(f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}", True)

        # Stop cluster agents.
        self._agents_stop()

        # Release cluster resource.
        available_resource = self._resource_redis.get_available_resource()

        # Update resource
        cluster_resource = self.cluster_details["master"]["resource"]
        _, updated_resource = resource_op(
            available_resource, cluster_resource, ResourceOperation.RELEASE
        )
        self._resource_redis.set_available_resource(updated_resource)

        # Rm connection from resource redis.
        self._resource_redis.sub_cluster()

        # Clear local redis data.
        self._redis_clear()

        logger.info(f"{self.cluster_name} is deleted.")
Exemple #3
0
    def _check_pending_ticket(self):
        # Check pending job ticket
        pending_jobs = self.redis_connection.lrange(f"{self.cluster_name}:pending_job_tickets", 0, -1)

        for job_name in pending_jobs:
            job_detail = json.loads(self.redis_connection.hget(f"{self.cluster_name}:job_details", job_name))

            # Allocation
            cluster_resource = json.loads(
                self.redis_connection.hget(f"{self.cluster_name}:runtime_detail", "available_resource")
            )
            is_satisfied, updated_resource = resource_op(
                cluster_resource,
                job_detail["total_request_resource"],
                ResourceOperation.ALLOCATION
            )
            if not is_satisfied:
                continue

            # Start job
            self._start_job(job_detail)
            self.redis_connection.lrem(f"{self.cluster_name}:pending_job_tickets", 0, job_name)
            self.redis_connection.hset(
                f"{self.cluster_name}:runtime_detail",
                "available_resource",
                json.dumps(updated_resource)
            )
Exemple #4
0
    def _job_clear(self, job_name: str, release_resource: dict):
        cluster_resource = json.loads(
            self.redis_connection.hget(f"{self.cluster_name}:runtime_detail", "available_resource")
        )

        # resource release
        _, updated_resource = resource_op(
            cluster_resource, release_resource, ResourceOperation.RELEASE
        )

        self.redis_connection.hset(
            f"{self.cluster_name}:runtime_detail",
            "available_resource",
            json.dumps(updated_resource)
        )
    def start_job(self, deployment_path: str):
        # Load start_job_deployment
        with open(deployment_path, "r") as fr:
            start_job_deployment = yaml.safe_load(fr)

        start_job_deployment = self._completed_local_job_deployment(start_job_deployment)

        # Check resource
        is_satisfied, _ = resource_op(
            self.cluster_details["master"]["resource"],
            start_job_deployment["total_request_resource"],
            ResourceOperation.ALLOCATION
        )
        if not is_satisfied:
            raise BadRequestError(f"No enough resource to start job {start_job_deployment['name']}.")

        self._push_pending_job(start_job_deployment)
    def create(self):
        logger.info("Creating cluster")

        # Get cluster name and save cluster details.
        if os.path.isdir(f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}"):
            raise BadRequestError(f"Cluster '{self.cluster_name}' is exist.")

        # Build connection with Resource Redis
        self._resource_redis.add_cluster()

        # Allocation
        cluster_resource = self.cluster_details["master"]["resource"]
        available_resource = self._resource_redis.get_available_resource()

        # Update resource
        is_satisfied, updated_resource = resource_op(
            available_resource, cluster_resource, ResourceOperation.ALLOCATION
        )
        if not is_satisfied:
            self._resource_redis.sub_cluster()
            raise BadRequestError("No enough resource for this cluster.")

        self._resource_redis.set_available_resource(updated_resource)

        # Start agents.
        self._agents_start()

        # Set available resource for cluster
        self._redis_connection.hset(
            f"{self.cluster_name}:runtime_detail",
            "available_resource",
            json.dumps(cluster_resource)
        )

        # Save cluster config locally.
        DetailsWriter.save_cluster_details(
            cluster_name=self.cluster_name,
            cluster_details=self.cluster_details
        )

        logger.info(f"{self.cluster_name} is created.")