Beispiel #1
0
    def _create_k8s_cluster(cluster_details: dict) -> None:
        """Create k8s cluster for the MARO Cluster.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating k8s cluster")

        # Create ARM parameters and start deployment
        template_file_path = f"{K8sPaths.ABS_MARO_K8S_LIB}/modes/aks/create_aks_cluster/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}/parameters/create_aks_cluster.json"
        )
        ArmTemplateParameterBuilder.create_aks_cluster(
            cluster_details=cluster_details, export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="aks_cluster",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Attach ACR
        AzureController.attach_acr(
            resource_group=cluster_details["cloud"]["resource_group"],
            aks_name=f"{cluster_details['id']}-aks",
            acr_name=f"{cluster_details['id']}acr")

        logger.info_green("K8s cluster is created")
Beispiel #2
0
    def _delete_node(self, node_name: str) -> None:
        """Delete the MARO Node.

        Args:
            node_name (str): name of the MARO Node.

        Returns:
            None.
        """
        logger.info(f"Deleting node '{node_name}'")

        # Delete node
        self.master_api_client.delete_node(node_name=node_name)

        # Delete resources
        self._delete_resources(resource_group=self.resource_group,
                               cluster_id=self.cluster_id,
                               resource_name=node_name)

        # Delete azure deployment
        AzureController.delete_deployment(resource_group=self.resource_group,
                                          deployment_name=node_name)

        # Delete node related files
        shutil.rmtree(
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}"
        )

        logger.info_green(f"Node '{node_name}' is deleted")
Beispiel #3
0
    def _create_vnet(cluster_details: dict) -> None:
        """Create vnet for the MARO Cluster.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating vnet")

        # Create ARM parameters and start deployment
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_vnet/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}/vnet/arm_create_vnet_parameters.json"
        )
        ArmTemplateParameterBuilder.create_vnet(
            cluster_details=cluster_details, export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="vnet",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        logger.info_green("Vnet is created")
Beispiel #4
0
    def _delete_resources(resource_group: str, cluster_id: int,
                          resource_name: str) -> None:
        """Delete resources in the resource group.

        Args:
            resource_group (str): name of the resource group.
            cluster_id (id): id of the MARO Cluster.
            resource_name (str): name of the MARO Resource. e.g. node_name

        Returns:
            None.
        """
        # Get resource list
        resource_list = AzureController.list_resources(
            resource_group=resource_group)

        # Filter resources
        deletable_ids = []
        for resource_info in resource_list:
            if resource_info["name"].startswith(
                    f"{cluster_id}-{resource_name}"):
                deletable_ids.append(resource_info["id"])

        # Delete resources
        if len(deletable_ids) > 0:
            AzureController.delete_resources(resource_ids=deletable_ids)
Beispiel #5
0
    def delete(self) -> None:
        """Delete the MARO Cluster.

        Returns:
            None.
        """
        logger.info(f"Deleting cluster '{self.cluster_name}'")

        # Get resource list
        resource_list = AzureController.list_resources(
            resource_group=self.resource_group)

        # Filter resources
        deletable_ids = []
        for resource in resource_list:
            if resource["name"].startswith(self.cluster_id):
                deletable_ids.append(resource["id"])

        # Delete resources
        if deletable_ids:
            AzureController.delete_resources(resource_ids=deletable_ids)

        # Delete cluster folder
        shutil.rmtree(f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}")

        logger.info_green(f"Cluster '{self.cluster_name}' is deleted")
Beispiel #6
0
    def _create_master_vm(cluster_details: dict) -> None:
        """Create MARO Master VM.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating Master VM")

        # Build params
        vm_name = f"{cluster_details['id']}-master-vm"

        # Create ARM parameters and start deployment
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_master/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}"
            f"/master/arm_create_master_parameters.json")
        ArmTemplateParameterBuilder.create_master(
            cluster_details=cluster_details,
            node_size=cluster_details["master"]["node_size"],
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="master",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Get master IP addresses
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        public_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]
        private_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "privateIpAddresses"][0]

        # Get other params and fill them to master_details
        hostname = vm_name
        username = cluster_details["cloud"]["default_username"]
        cluster_details["master"]["hostname"] = hostname
        cluster_details["master"]["username"] = username
        cluster_details["master"]["public_ip_address"] = public_ip_address
        cluster_details["master"]["private_ip_address"] = private_ip_address
        cluster_details["master"]["resource_name"] = vm_name
        cluster_details["master"]["ssh"] = {
            "port": cluster_details["connection"]["ssh"]["port"]
        }
        cluster_details["master"]["api_server"] = {
            "port": cluster_details["connection"]["api_server"]["port"]
        }
        logger.info_green(
            f"You can login to your master node with: {username}@{public_ip_address}"
        )

        logger.info_green("Master VM is created")
Beispiel #7
0
    def _load_k8s_context(cluster_id: int, resource_group: str) -> None:
        """Load the k8s context.

        Set current k8s context (only in the CLI runtime) to the k8s cluster that related to the MARO Cluster.

        Args:
            cluster_id (str): id of the MARO Cluster.
            resource_group (str): name of the resource group.

        Returns:
            None.
        """
        AzureController.load_aks_context(resource_group=resource_group,
                                         aks_name=f"{cluster_id}-aks")
        config.load_kube_config(context=f"{cluster_id}-aks")
Beispiel #8
0
    def _create_storage_account_secret(cluster_details: dict) -> None:
        """Setup storage_account_secret for the MARO Cluster.

        The secret is used in Azure File Service.

        Returns:
            None.
        """
        # Build params
        storage_account_name = f"{cluster_details['id']}st"

        # Get storage account key
        storage_account_keys = AzureController.get_storage_account_keys(
            resource_group=cluster_details["cloud"]["resource_group"],
            storage_account_name=storage_account_name)
        storage_key = storage_account_keys[0]["value"]

        # Create k8s secret
        client.CoreV1Api().create_namespaced_secret(body=client.V1Secret(
            metadata=client.V1ObjectMeta(name="azure-storage-account-secret"),
            data={
                "azurestorageaccountname":
                base64.b64encode(storage_account_name.encode()).decode(),
                "azurestorageaccountkey":
                base64.b64encode(bytes(storage_key.encode())).decode()
            }),
                                                    namespace="default")
Beispiel #9
0
    def tearDownClass(cls) -> None:
        # Print result.
        print(json.dumps(cls.test_func_to_time, indent=4, sort_keys=True))

        # Delete resource group.
        AzureController.delete_resource_group(
            resource_group=cls.resource_group)

        # Delete tmp test folder.
        shutil.rmtree(f"{GlobalPaths.ABS_MARO_TEST}/{cls.test_id}")

        # Delete docker image.
        try:
            command = "docker rmi maro_runtime_cpu:test"
            Subprocess.run(command=command)
        except CommandExecutionError:
            pass
Beispiel #10
0
    def list_image(self):
        """Print image details to the command line.

        Returns:
            None.
        """
        # List acr repository
        acr_repositories = AzureController.list_acr_repositories(
            acr_name=f"{self.cluster_id}acr")
        logger.info(acr_repositories)
Beispiel #11
0
    def _start_node(self, node_name: str):
        """Start the MARO Node VM.

        Args:
            node_name (str): name of the MARO Node.

        Returns:
            None.
        """
        logger.info(f"Starting node '{node_name}'")

        # Start node vm
        AzureController.start_vm(resource_group=self.resource_group,
                                 vm_name=f"{self.cluster_id}-{node_name}-vm")

        # Start node
        self.master_api_client.start_node(node_name=node_name)

        logger.info_green(f"Node '{node_name}' is started")
Beispiel #12
0
    def _stop_node(self, node_details: dict):
        """Stop MARO Node VM.

        Args:
            node_details (dict): details of the MARO Node.

        Returns:
            None.
        """
        node_name = node_details["name"]

        logger.info(f"Stopping node '{node_name}'")

        # Stop node
        self.master_api_client.stop_node(node_name=node_name)

        # Stop node vm
        AzureController.stop_vm(resource_group=self.resource_group,
                                vm_name=f"{self.cluster_id}-{node_name}-vm")

        logger.info_green(f"Node '{node_name}' is stopped")
Beispiel #13
0
    def push_image(self, image_name: str) -> None:
        """Push local image to the MARO Cluster.

        Args:
            image_name (str): name of the local image that loaded in the docker.

        Returns:
            None.
        """
        remote_image_name = f"{self.cluster_id}acr.azurecr.io/{image_name}"

        # ACR login
        AzureController.login_acr(acr_name=f"{self.cluster_id}acr")

        # Tag image
        command = f"docker tag {image_name} {remote_image_name}"
        _ = Subprocess.run(command=command)

        # Push image to ACR
        command = f"docker push {remote_image_name}"
        _ = Subprocess.run(command=command)
Beispiel #14
0
    def _create_resource_group(cluster_details: dict) -> None:
        """Create the resource group if it does not exist.

        Args:
            cluster_details (dict): details of the cluster.

        Returns:
            None.
        """

        # Get params
        subscription = cluster_details["cloud"]["subscription"]
        resource_group = cluster_details["cloud"]["resource_group"]

        # Check if Azure CLI is installed, and print version
        azure_version = AzureController.get_version()
        logger.info_green(
            f"Your Azure CLI version: {azure_version['azure-cli']}")

        # Set subscription id
        AzureController.set_subscription(subscription=subscription)
        logger.info_green(f"Set subscription to '{subscription}'")

        # Check and create resource group
        resource_group_info = AzureController.get_resource_group(
            resource_group=resource_group)
        if resource_group_info:
            logger.warning_yellow(
                f"Azure resource group '{resource_group}' already exists")
        else:
            AzureController.create_resource_group(
                resource_group=resource_group,
                location=cluster_details["cloud"]["location"])
            logger.info_green(f"Resource group '{resource_group}' is created")
Beispiel #15
0
    def create_node(node_name: str, cluster_details: dict, node_size: str,
                    export_path: str) -> dict:
        """Create parameters file for MARO Node VM.

        Args:
            cluster_details (dict): details of the MARO Cluster.
            node_name (str): name of the MARO Node.
            node_size (str): node_size of the MARO Node VM.
            export_path (str): path to export the parameter file.

        Returns:
            dict: parameter dict, should be exported to json.
        """
        # Load and update parameters
        with open(
                file=
                f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_node/parameters.json",
                mode="r") as fr:
            base_parameters = json.load(fr)
            parameters = base_parameters["parameters"]
            parameters["adminPublicKey"]["value"] = cluster_details["cloud"][
                "default_public_key"]
            parameters["adminUsername"]["value"] = cluster_details["cloud"][
                "default_username"]
            parameters["imageResourceId"][
                "value"] = AzureController.get_image_resource_id(
                    resource_group=cluster_details["cloud"]["resource_group"],
                    image_name=f"{cluster_details['id']}-node-image")
            parameters["location"]["value"] = cluster_details["cloud"][
                "location"]
            parameters["networkInterfaceName"][
                "value"] = f"{cluster_details['id']}-{node_name}-nic"
            parameters["networkSecurityGroupName"][
                "value"] = f"{cluster_details['id']}-{node_name}-nsg"
            parameters["publicIpAddressName"][
                "value"] = f"{cluster_details['id']}-{node_name}-pip"
            parameters["sshDestinationPorts"]["value"] = [
                cluster_details["connection"]["ssh"]["port"]
            ]
            parameters["virtualMachineName"][
                "value"] = f"{cluster_details['id']}-{node_name}-vm"
            parameters["virtualMachineSize"]["value"] = node_size
            parameters["virtualNetworkName"][
                "value"] = f"{cluster_details['id']}-vnet"

        # Export parameters if the path is set
        if export_path:
            os.makedirs(os.path.dirname(export_path), exist_ok=True)
            with open(export_path, "w") as fw:
                json.dump(base_parameters, fw, indent=4)

        return base_parameters
Beispiel #16
0
    def _scale_node_pool(self, replicas: int, node_size: str,
                         node_size_to_info: dict):
        """Scale node pool of the specific node_size.

        Args:
            replicas (int): number of MARO Node in specific node_size to stop.
            node_size (str): size of the MARO Node VM,
                see https://docs.microsoft.com/en-us/azure/virtual-machines/sizes for reference.
            node_size_to_info (dict): node_size to info mapping.

        Returns:
            None.
        """
        logger.info(f"Scaling '{node_size}' nodepool")

        # Scale node pool
        AzureController.scale_nodepool(
            resource_group=self.resource_group,
            aks_name=f"{self.cluster_id}-aks",
            nodepool_name=node_size_to_info[node_size]["name"],
            node_count=replicas)

        logger.info_green(f"'{node_size}' nodepool is scaled")
Beispiel #17
0
    def _build_node_pool(self, replicas: int, node_size: str) -> None:
        """Build node pool for the specific node_size.

        Args:
            replicas (int): number of MARO Node in specific node_size to stop.
            node_size (str): size of the MARO Node VM,
                see https://docs.microsoft.com/en-us/azure/virtual-machines/sizes for reference.

        Returns:
            None.
        """
        logger.info(f"Building '{node_size}' nodepool")

        # Build nodepool
        AzureController.add_nodepool(
            resource_group=self.resource_group,
            aks_name=f"{self.cluster_id}-aks",
            nodepool_name=K8sAksExecutor._generate_nodepool_name(
                node_size=node_size),
            node_count=replicas,
            node_size=node_size)

        logger.info_green(f"'{node_size}' nodepool is built")
Beispiel #18
0
    def _prepare_join_cluster_deployment(cls, join_cluster_deployment: dict):
        # Get params.
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cls.resource_group, vm_name="node-vm")

        # Saved join cluster deployment.
        join_cluster_deployment["node"]["hostname"] = "node-vm"
        join_cluster_deployment["node"]["public_ip_address"] = (
            ip_addresses[0]["virtualMachine"]["network"]["publicIpAddresses"]
            [0]["ipAddress"])
        join_cluster_deployment["node"]["private_ip_address"] = (
            ip_addresses[0]["virtualMachine"]["network"]["privateIpAddresses"]
            [0])
        with open(file=cls.join_cluster_deployment_path, mode="w") as fw:
            yaml.safe_dump(data=join_cluster_deployment, stream=fw)
Beispiel #19
0
    def _get_node_size_to_spec(self) -> dict:
        """Get node_size to spec mapping of Azure VM.

        Returns:
            dict: node_size to spec mapping.
        """
        # List available sizes for VM
        specs = AzureController.list_vm_sizes(location=self.location)

        # Build node_size_to_spec
        node_size_to_spec = {}
        for spec in specs:
            node_size_to_spec[spec["name"]] = spec

        return node_size_to_spec
Beispiel #20
0
    def _get_node_size_to_info(self) -> dict:
        """Get node_size to info mapping of the K8s Cluster.

        Returns:
            dict: node_size to info mapping.
        """
        # List nodepool
        nodepools = AzureController.list_nodepool(
            resource_group=self.resource_group,
            aks_name=f"{self.cluster_id}-aks")

        # Build node_size_to_count
        node_size_to_count = {}
        for nodepool in nodepools:
            node_size_to_count[nodepool["vmSize"]] = nodepool

        return node_size_to_count
Beispiel #21
0
    def _build_image_address(self, image_name: str) -> str:
        """Build image address name for image that stored at Azure Container Registry.

        Args:
            image_name (str): name of the image.

        Returns:
            str: image address name.
        """
        # Get repositories
        acr_repositories = AzureController.list_acr_repositories(
            acr_name=f"{self.cluster_id}acr")

        # Build address
        if image_name in acr_repositories:
            return f"{self.cluster_id}acr.azurecr.io/{image_name}"
        else:
            return image_name
Beispiel #22
0
    def list_node(self) -> None:
        """Print node details to the command line.

        Returns:
            None.
        """
        # Get aks details
        aks_details = AzureController.get_aks(
            resource_group=self.resource_group,
            aks_name=f"{self.cluster_id}-aks")
        agent_pools_details = aks_details["agentPoolProfiles"]

        # Filter and print
        node_details = {}
        for agent_pool_details in agent_pools_details:
            node_details[
                agent_pool_details["vmSize"]] = agent_pool_details["count"]
        logger.info(json.dumps(node_details, indent=4, sort_keys=True))
Beispiel #23
0
    def _check_and_get_account_sas(self) -> str:
        """Check and get account sas token, also update it to the cluster_details.

        Ref: https://msdn.microsoft.com/library/azure/mt584140.aspx

        Returns:
            str: account sas token.
        """

        # Load details
        cloud_details = self.cluster_details["cloud"]

        # Regenerate sas if the key is None or expired TODO:
        if "account_sas" not in cloud_details:
            account_sas = AzureController.get_storage_account_sas(
                account_name=f"{self.cluster_id}st")
            cloud_details["account_sas"] = account_sas
            DetailsWriter.save_cluster_details(
                cluster_name=self.cluster_name,
                cluster_details=self.cluster_details)

        return cloud_details["account_sas"]
Beispiel #24
0
 def _create_virtual_machines(cls, test_config: dict):
     cls.build_arm_parameters(
         build_config={
             "location": cls.location,
             "default_username": cls.default_username,
             "default_public_key": test_config["cloud/default_public_key"],
             "ssh": {
                 "port": GlobalParams.DEFAULT_SSH_PORT
             },
             "api_server": {
                 "port": GrassParams.DEFAULT_API_SERVER_PORT
             }
         },
         export_path=cls.arm_parameters_file_export_path)
     AzureController.set_subscription(
         subscription=test_config["cloud/subscription"])
     AzureController.create_resource_group(
         resource_group=cls.resource_group, location=cls.location)
     AzureController.start_deployment(
         resource_group=cls.resource_group,
         deployment_name="cluster",
         template_file_path=cls.arm_template_file_path,
         parameters_file_path=cls.arm_parameters_file_export_path)
Beispiel #25
0
    def setUpClass(cls) -> None:
        # Get and set params
        GlobalParams.LOG_LEVEL = logging.DEBUG
        cls.test_id = uuid.uuid4().hex[:8]
        os.makedirs(
            os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}"),
            exist_ok=True)
        cls.test_file_path = os.path.abspath(__file__)
        cls.test_dir_path = os.path.dirname(cls.test_file_path)

        # Load config
        cls.config_path = os.path.normpath(
            os.path.join(cls.test_dir_path, "./config.yml"))

        # Load config
        with open(cls.config_path) as fr:
            config_details = yaml.safe_load(fr)
            if config_details["cloud/subscription"] and config_details[
                    "user/admin_public_key"]:
                pass
            else:
                raise Exception("Invalid config")

        # Create resource group
        AzureController.create_resource_group(cls.resource_group, cls.location)

        # Create ARM params
        template_file_location = f"{cls.test_dir_path}/test_checkpoint_template.json"
        base_parameters_file_location = f"{cls.test_dir_path}/test_checkpoint_parameters.json"
        parameters_file_location = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{cls.test_id}/test_checkpoint_parameters.json"
        )
        with open(base_parameters_file_location, "r") as f:
            base_parameters = json.load(f)
        with open(parameters_file_location, "w") as fw:
            parameters = base_parameters["parameters"]
            parameters["location"]["value"] = cls.location
            parameters["networkInterfaceName"]["value"] = f"{cls.test_id}-nic"
            parameters["networkSecurityGroupName"][
                "value"] = f"{cls.test_id}-nsg"
            parameters["virtualNetworkName"]["value"] = f"{cls.test_id}-vnet"
            parameters["publicIpAddressName"]["value"] = f"{cls.test_id}-pip"
            parameters["virtualMachineName"]["value"] = f"{cls.test_id}-vm"
            parameters["virtualMachineSize"]["value"] = "Standard_B2s"
            parameters["adminUsername"]["value"] = cls.admin_username
            parameters["adminPublicKey"]["value"] = config_details[
                "user/admin_public_key"]
            parameters["storageAccountName"]["value"] = f"{cls.test_id}st"
            json.dump(base_parameters, fw, indent=4)

        # Start ARM deployment
        AzureController.start_deployment(
            resource_group=cls.resource_group,
            deployment_name=cls.test_id,
            template_file=template_file_location,
            parameters_file=parameters_file_location)
        cls._gracefully_wait(15)

        # Get params after ARM deployment
        cls.conn_str = AzureController.get_connection_string(
            storage_account_name=f"{cls.test_id}st")
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cls.resource_group, vm_name=f"{cls.test_id}-vm")
        cls.ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]
Beispiel #26
0
    def _build_node_image(cluster_details: dict) -> None:
        """Build Azure Image for MARO Node.

        The built image will contain required Node runtime environment including GPU support.
        See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image for reference.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Building MARO Node image")

        # Build params
        resource_name = "build-node-image"
        image_name = f"{cluster_details['id']}-node-image"
        vm_name = f"{cluster_details['id']}-{resource_name}-vm"

        # Create ARM parameters and start deployment.
        # For simplicity, we use master_node_size as the size of build_node_image_vm here
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_build_node_image_vm/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}"
            f"/build_node_image_vm/arm_create_build_node_image_vm_parameters.json"
        )
        ArmTemplateParameterBuilder.create_build_node_image_vm(
            cluster_details=cluster_details,
            node_size=cluster_details["master"]["node_size"],
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name=resource_name,
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)
        # Gracefully wait
        time.sleep(10)

        # Get public ip address
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        public_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]

        # Make sure build_node_image_vm is able to connect
        GrassAzureExecutor.retry_connection(
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])

        # Run init image script
        FileSynchronizer.copy_files_to_node(
            local_path=
            f"{GrassPaths.MARO_GRASS_LIB}/scripts/build_node_image_vm/init_build_node_image_vm.py",
            remote_dir="~/",
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])
        GrassAzureExecutor.remote_init_build_node_image_vm(
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])

        # Extract image
        AzureController.deallocate_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        AzureController.generalize_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        AzureController.create_image_from_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            image_name=image_name,
            vm_name=vm_name)

        # Delete resources
        GrassAzureExecutor._delete_resources(
            resource_group=cluster_details["cloud"]["resource_group"],
            resource_name=resource_name,
            cluster_id=cluster_details["id"])

        logger.info_green("MARO Node Image is built")
Beispiel #27
0
    def _create_vm(self, node_name: str, node_size: str) -> dict:
        """Create MARO Node VM.

        Args:
            node_name (str): name of the MARO Node. Also the id of the MARO Node.
            node_size (str): size of the MARO Node VM.

        Returns:
            dict: join_cluster_deployment that needed in "join cluster" operation.
                See /lib/scripts/join_cluster.py for reference.
        """
        logger.info(message=f"Creating VM '{node_name}'")

        # Create ARM parameters and start deployment
        os.makedirs(
            name=
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}",
            exist_ok=True)
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_node/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}/arm_create_node_parameters.json"
        )
        ArmTemplateParameterBuilder.create_node(
            node_name=node_name,
            cluster_details=self.cluster_details,
            node_size=node_size,
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=self.resource_group,
            deployment_name=node_name,
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Get node IP addresses
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=self.resource_group,
            vm_name=f"{self.cluster_id}-{node_name}-vm")

        logger.info_green(f"VM '{node_name}' is created")

        # Build join_cluster_deployment.
        join_cluster_deployment = {
            "mode": "grass/azure",
            "master": {
                "private_ip_address": self.master_private_ip_address,
                "api_server": {
                    "port": self.master_api_server_port
                },
                "redis": {
                    "port": self.master_redis_port
                }
            },
            "node": {
                "name":
                node_name,
                "id":
                node_name,
                "username":
                self.default_username,
                "public_ip_address":
                ip_addresses[0]["virtualMachine"]["network"]
                ["publicIpAddresses"][0]["ipAddress"],
                "private_ip_address":
                ip_addresses[0]["virtualMachine"]["network"]
                ["privateIpAddresses"][0],
                "node_size":
                node_size,
                "resource_name":
                f"{self.cluster_id}-{node_name}-vm",
                "hostname":
                f"{self.cluster_id}-{node_name}-vm",
                "resources": {
                    "cpu": "all",
                    "memory": "all",
                    "gpu": "all"
                },
                "api_server": {
                    "port": self.api_server_port
                },
                "ssh": {
                    "port": self.ssh_port
                }
            },
            "configs": {
                "install_node_runtime": False,
                "install_node_gpu_support": False
            }
        }
        with open(
                file=
                f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}/join_cluster_deployment.yml",
                mode="w") as fw:
            yaml.safe_dump(data=join_cluster_deployment, stream=fw)

        return join_cluster_deployment
Beispiel #28
0
 def tearDownClass(cls) -> None:
     # Delete resource group after the test
     AzureController.delete_resource_group(cls.resource_group)