Beispiel #1
0
    def _create_k8s_cluster(cluster_details: dict) -> None:
        """Create k8s cluster for the MARO Cluster.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating k8s cluster")

        # Create ARM parameters and start deployment
        template_file_path = f"{K8sPaths.ABS_MARO_K8S_LIB}/modes/aks/create_aks_cluster/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}/parameters/create_aks_cluster.json"
        )
        ArmTemplateParameterBuilder.create_aks_cluster(
            cluster_details=cluster_details, export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="aks_cluster",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Attach ACR
        AzureController.attach_acr(
            resource_group=cluster_details["cloud"]["resource_group"],
            aks_name=f"{cluster_details['id']}-aks",
            acr_name=f"{cluster_details['id']}acr")

        logger.info_green("K8s cluster is created")
Beispiel #2
0
    def _create_vnet(cluster_details: dict) -> None:
        """Create vnet for the MARO Cluster.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating vnet")

        # Create ARM parameters and start deployment
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_vnet/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}/vnet/arm_create_vnet_parameters.json"
        )
        ArmTemplateParameterBuilder.create_vnet(
            cluster_details=cluster_details, export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="vnet",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        logger.info_green("Vnet is created")
Beispiel #3
0
    def _create_master_vm(cluster_details: dict) -> None:
        """Create MARO Master VM.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Creating Master VM")

        # Build params
        vm_name = f"{cluster_details['id']}-master-vm"

        # Create ARM parameters and start deployment
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_master/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}"
            f"/master/arm_create_master_parameters.json")
        ArmTemplateParameterBuilder.create_master(
            cluster_details=cluster_details,
            node_size=cluster_details["master"]["node_size"],
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name="master",
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Get master IP addresses
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        public_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]
        private_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "privateIpAddresses"][0]

        # Get other params and fill them to master_details
        hostname = vm_name
        username = cluster_details["cloud"]["default_username"]
        cluster_details["master"]["hostname"] = hostname
        cluster_details["master"]["username"] = username
        cluster_details["master"]["public_ip_address"] = public_ip_address
        cluster_details["master"]["private_ip_address"] = private_ip_address
        cluster_details["master"]["resource_name"] = vm_name
        cluster_details["master"]["ssh"] = {
            "port": cluster_details["connection"]["ssh"]["port"]
        }
        cluster_details["master"]["api_server"] = {
            "port": cluster_details["connection"]["api_server"]["port"]
        }
        logger.info_green(
            f"You can login to your master node with: {username}@{public_ip_address}"
        )

        logger.info_green("Master VM is created")
Beispiel #4
0
 def _create_virtual_machines(cls, test_config: dict):
     cls.build_arm_parameters(
         build_config={
             "location": cls.location,
             "default_username": cls.default_username,
             "default_public_key": test_config["cloud/default_public_key"],
             "ssh": {
                 "port": GlobalParams.DEFAULT_SSH_PORT
             },
             "api_server": {
                 "port": GrassParams.DEFAULT_API_SERVER_PORT
             }
         },
         export_path=cls.arm_parameters_file_export_path)
     AzureController.set_subscription(
         subscription=test_config["cloud/subscription"])
     AzureController.create_resource_group(
         resource_group=cls.resource_group, location=cls.location)
     AzureController.start_deployment(
         resource_group=cls.resource_group,
         deployment_name="cluster",
         template_file_path=cls.arm_template_file_path,
         parameters_file_path=cls.arm_parameters_file_export_path)
Beispiel #5
0
    def setUpClass(cls) -> None:
        # Get and set params
        GlobalParams.LOG_LEVEL = logging.DEBUG
        cls.test_id = uuid.uuid4().hex[:8]
        os.makedirs(
            os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}"),
            exist_ok=True)
        cls.test_file_path = os.path.abspath(__file__)
        cls.test_dir_path = os.path.dirname(cls.test_file_path)

        # Load config
        cls.config_path = os.path.normpath(
            os.path.join(cls.test_dir_path, "./config.yml"))

        # Load config
        with open(cls.config_path) as fr:
            config_details = yaml.safe_load(fr)
            if config_details["cloud/subscription"] and config_details[
                    "user/admin_public_key"]:
                pass
            else:
                raise Exception("Invalid config")

        # Create resource group
        AzureController.create_resource_group(cls.resource_group, cls.location)

        # Create ARM params
        template_file_location = f"{cls.test_dir_path}/test_checkpoint_template.json"
        base_parameters_file_location = f"{cls.test_dir_path}/test_checkpoint_parameters.json"
        parameters_file_location = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{cls.test_id}/test_checkpoint_parameters.json"
        )
        with open(base_parameters_file_location, "r") as f:
            base_parameters = json.load(f)
        with open(parameters_file_location, "w") as fw:
            parameters = base_parameters["parameters"]
            parameters["location"]["value"] = cls.location
            parameters["networkInterfaceName"]["value"] = f"{cls.test_id}-nic"
            parameters["networkSecurityGroupName"][
                "value"] = f"{cls.test_id}-nsg"
            parameters["virtualNetworkName"]["value"] = f"{cls.test_id}-vnet"
            parameters["publicIpAddressName"]["value"] = f"{cls.test_id}-pip"
            parameters["virtualMachineName"]["value"] = f"{cls.test_id}-vm"
            parameters["virtualMachineSize"]["value"] = "Standard_B2s"
            parameters["adminUsername"]["value"] = cls.admin_username
            parameters["adminPublicKey"]["value"] = config_details[
                "user/admin_public_key"]
            parameters["storageAccountName"]["value"] = f"{cls.test_id}st"
            json.dump(base_parameters, fw, indent=4)

        # Start ARM deployment
        AzureController.start_deployment(
            resource_group=cls.resource_group,
            deployment_name=cls.test_id,
            template_file=template_file_location,
            parameters_file=parameters_file_location)
        cls._gracefully_wait(15)

        # Get params after ARM deployment
        cls.conn_str = AzureController.get_connection_string(
            storage_account_name=f"{cls.test_id}st")
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cls.resource_group, vm_name=f"{cls.test_id}-vm")
        cls.ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]
Beispiel #6
0
    def _create_vm(self, node_name: str, node_size: str) -> dict:
        """Create MARO Node VM.

        Args:
            node_name (str): name of the MARO Node. Also the id of the MARO Node.
            node_size (str): size of the MARO Node VM.

        Returns:
            dict: join_cluster_deployment that needed in "join cluster" operation.
                See /lib/scripts/join_cluster.py for reference.
        """
        logger.info(message=f"Creating VM '{node_name}'")

        # Create ARM parameters and start deployment
        os.makedirs(
            name=
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}",
            exist_ok=True)
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_node/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}/arm_create_node_parameters.json"
        )
        ArmTemplateParameterBuilder.create_node(
            node_name=node_name,
            cluster_details=self.cluster_details,
            node_size=node_size,
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=self.resource_group,
            deployment_name=node_name,
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)

        # Get node IP addresses
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=self.resource_group,
            vm_name=f"{self.cluster_id}-{node_name}-vm")

        logger.info_green(f"VM '{node_name}' is created")

        # Build join_cluster_deployment.
        join_cluster_deployment = {
            "mode": "grass/azure",
            "master": {
                "private_ip_address": self.master_private_ip_address,
                "api_server": {
                    "port": self.master_api_server_port
                },
                "redis": {
                    "port": self.master_redis_port
                }
            },
            "node": {
                "name":
                node_name,
                "id":
                node_name,
                "username":
                self.default_username,
                "public_ip_address":
                ip_addresses[0]["virtualMachine"]["network"]
                ["publicIpAddresses"][0]["ipAddress"],
                "private_ip_address":
                ip_addresses[0]["virtualMachine"]["network"]
                ["privateIpAddresses"][0],
                "node_size":
                node_size,
                "resource_name":
                f"{self.cluster_id}-{node_name}-vm",
                "hostname":
                f"{self.cluster_id}-{node_name}-vm",
                "resources": {
                    "cpu": "all",
                    "memory": "all",
                    "gpu": "all"
                },
                "api_server": {
                    "port": self.api_server_port
                },
                "ssh": {
                    "port": self.ssh_port
                }
            },
            "configs": {
                "install_node_runtime": False,
                "install_node_gpu_support": False
            }
        }
        with open(
                file=
                f"{GlobalPaths.ABS_MARO_CLUSTERS}/{self.cluster_name}/nodes/{node_name}/join_cluster_deployment.yml",
                mode="w") as fw:
            yaml.safe_dump(data=join_cluster_deployment, stream=fw)

        return join_cluster_deployment
Beispiel #7
0
    def _build_node_image(cluster_details: dict) -> None:
        """Build Azure Image for MARO Node.

        The built image will contain required Node runtime environment including GPU support.
        See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image for reference.

        Args:
            cluster_details (dict): details of the MARO Cluster.

        Returns:
            None.
        """
        logger.info("Building MARO Node image")

        # Build params
        resource_name = "build-node-image"
        image_name = f"{cluster_details['id']}-node-image"
        vm_name = f"{cluster_details['id']}-{resource_name}-vm"

        # Create ARM parameters and start deployment.
        # For simplicity, we use master_node_size as the size of build_node_image_vm here
        template_file_path = f"{GrassPaths.ABS_MARO_GRASS_LIB}/modes/azure/create_build_node_image_vm/template.json"
        parameters_file_path = (
            f"{GlobalPaths.ABS_MARO_CLUSTERS}/{cluster_details['name']}"
            f"/build_node_image_vm/arm_create_build_node_image_vm_parameters.json"
        )
        ArmTemplateParameterBuilder.create_build_node_image_vm(
            cluster_details=cluster_details,
            node_size=cluster_details["master"]["node_size"],
            export_path=parameters_file_path)
        AzureController.start_deployment(
            resource_group=cluster_details["cloud"]["resource_group"],
            deployment_name=resource_name,
            template_file_path=template_file_path,
            parameters_file_path=parameters_file_path)
        # Gracefully wait
        time.sleep(10)

        # Get public ip address
        ip_addresses = AzureController.list_ip_addresses(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        public_ip_address = ip_addresses[0]["virtualMachine"]["network"][
            "publicIpAddresses"][0]["ipAddress"]

        # Make sure build_node_image_vm is able to connect
        GrassAzureExecutor.retry_connection(
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])

        # Run init image script
        FileSynchronizer.copy_files_to_node(
            local_path=
            f"{GrassPaths.MARO_GRASS_LIB}/scripts/build_node_image_vm/init_build_node_image_vm.py",
            remote_dir="~/",
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])
        GrassAzureExecutor.remote_init_build_node_image_vm(
            node_username=cluster_details["cloud"]["default_username"],
            node_hostname=public_ip_address,
            node_ssh_port=cluster_details["connection"]["ssh"]["port"])

        # Extract image
        AzureController.deallocate_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        AzureController.generalize_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            vm_name=vm_name)
        AzureController.create_image_from_vm(
            resource_group=cluster_details["cloud"]["resource_group"],
            image_name=image_name,
            vm_name=vm_name)

        # Delete resources
        GrassAzureExecutor._delete_resources(
            resource_group=cluster_details["cloud"]["resource_group"],
            resource_name=resource_name,
            cluster_id=cluster_details["id"])

        logger.info_green("MARO Node Image is built")