def _create_master(self): logger.info("Creating master VM") # Load details cluster_details = self.cluster_details master_details = cluster_details['master'] cluster_id = cluster_details['id'] resource_group = cluster_details['cloud']['resource_group'] admin_username = cluster_details['user']['admin_username'] node_size = cluster_details['master']['node_size'] # Create ARM parameters self._create_deployment_parameters( node_name='master', cluster_details=cluster_details, node_size=node_size, export_dir=os.path.expanduser(f"{GlobalPaths.MARO_CLUSTERS}/{self.cluster_name}/parameters") ) # Start deployment template_file_location = f"{GlobalPaths.MARO_GRASS_LIB}/azure/grass-create-default-node-template.json" parameters_file_location = f"{GlobalPaths.MARO_CLUSTERS}/{self.cluster_name}/parameters/master.json" AzureExecutor.start_deployment( resource_group=resource_group, deployment_name='master', template_file=template_file_location, parameters_file=parameters_file_location ) # Get master IP addresses ip_addresses = AzureExecutor.list_ip_addresses( resource_group=resource_group, vm_name=f"{cluster_id}-master-vm" ) public_ip_address = ip_addresses[0]["virtualMachine"]["network"]['publicIpAddresses'][0]['ipAddress'] private_ip_address = ip_addresses[0]["virtualMachine"]["network"]['privateIpAddresses'][0] hostname = f"{cluster_id}-master-vm" master_details['public_ip_address'] = public_ip_address master_details['private_ip_address'] = private_ip_address master_details['hostname'] = hostname master_details['resource_name'] = f"{cluster_id}-master-vm" logger.info_green(f"You can login to your master node with: ssh {admin_username}@{public_ip_address}") # Save details save_cluster_details( cluster_name=self.cluster_name, cluster_details=cluster_details, sync=False ) logger.info_green("Master VM is created")
def _create_vm(self, node_name: str, node_size: str, node_size_to_spec: dict): logger.info(message=f"Creating VM {node_name}") # Load details cluster_details = self.cluster_details location = cluster_details['cloud']['location'] cluster_id = cluster_details['id'] resource_group = cluster_details['cloud']['resource_group'] # Create ARM parameters GrassAzureExecutor._create_deployment_parameters( node_name=node_name, cluster_details=cluster_details, node_size=node_size, export_dir=os.path.expanduser( f"{GlobalPaths.MARO_CLUSTERS}/{self.cluster_name}/parameters")) # Get sku and check gpu nums gpu_nums = 0 node_size_sku = AzureExecutor.get_sku(vm_size=node_size, location=location) if node_size_sku is not None: for capability in node_size_sku["capabilities"]: if capability["name"] == "GPUs": gpu_nums = int(capability["value"]) break # Start deployment if gpu_nums > 0: template_file_location = f"{GlobalPaths.MARO_GRASS_LIB}/azure/grass-create-gpu-node-template.json" else: template_file_location = f"{GlobalPaths.MARO_GRASS_LIB}/azure/grass-create-default-node-template.json" parameters_file_location = f"{GlobalPaths.MARO_CLUSTERS}/{self.cluster_name}/parameters/{node_name}.json" AzureExecutor.start_deployment( resource_group=resource_group, deployment_name=node_name, template_file=template_file_location, parameters_file=parameters_file_location) # Get node IP addresses ip_addresses = AzureExecutor.list_ip_addresses( resource_group=resource_group, vm_name=f"{cluster_id}-{node_name}-vm") # Save details node_details = { 'public_ip_address': ip_addresses[0]["virtualMachine"]["network"]['publicIpAddresses'] [0]['ipAddress'], 'private_ip_address': ip_addresses[0]["virtualMachine"]["network"]['privateIpAddresses'] [0], 'node_size': node_size, 'resource_name': f"{cluster_id}-{node_name}-vm", 'hostname': f"{cluster_id}-{node_name}-vm", 'resources': { 'cpu': node_size_to_spec[node_size]['numberOfCores'], 'memory': node_size_to_spec[node_size]['memoryInMb'], 'gpu': gpu_nums } } self.grass_executor.remote_set_node_details( node_name=node_name, node_details=node_details, ) logger.info_green(f"VM {node_name} is created")
def setUpClass(cls) -> None: # Get and set params GlobalParams.LOG_LEVEL = logging.DEBUG cls.test_id = uuid.uuid4().hex[:8] os.makedirs( os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}"), exist_ok=True) cls.test_file_path = os.path.abspath(__file__) cls.test_dir_path = os.path.dirname(cls.test_file_path) # Load config cls.config_path = os.path.normpath( os.path.join(cls.test_dir_path, "./config.yml")) # Load config with open(cls.config_path) as fr: config_details = yaml.safe_load(fr) if config_details["cloud/subscription"] and config_details[ "user/admin_public_key"]: pass else: raise Exception("Invalid config") # Create resource group AzureExecutor.create_resource_group(cls.resource_group, cls.location) # Create ARM params template_file_location = f"{cls.test_dir_path}/test_checkpoint_template.json" base_parameters_file_location = f"{cls.test_dir_path}/test_checkpoint_parameters.json" parameters_file_location = os.path.expanduser( f"{GlobalPaths.MARO_TEST}/{cls.test_id}/test_checkpoint_parameters.json" ) with open(base_parameters_file_location, "r") as f: base_parameters = json.load(f) with open(parameters_file_location, "w") as fw: parameters = base_parameters["parameters"] parameters["location"]["value"] = cls.location parameters["networkInterfaceName"]["value"] = f"{cls.test_id}-nic" parameters["networkSecurityGroupName"][ "value"] = f"{cls.test_id}-nsg" parameters["virtualNetworkName"]["value"] = f"{cls.test_id}-vnet" parameters["publicIpAddressName"]["value"] = f"{cls.test_id}-pip" parameters["virtualMachineName"]["value"] = f"{cls.test_id}-vm" parameters["virtualMachineSize"]["value"] = "Standard_B2s" parameters["adminUsername"]["value"] = cls.admin_username parameters["adminPublicKey"]["value"] = config_details[ "user/admin_public_key"] parameters["storageAccountName"]["value"] = f"{cls.test_id}st" json.dump(base_parameters, fw, indent=4) # Start ARM deployment AzureExecutor.start_deployment( resource_group=cls.resource_group, deployment_name=cls.test_id, template_file=template_file_location, parameters_file=parameters_file_location) cls._gracefully_wait(15) # Get params after ARM deployment cls.conn_str = AzureExecutor.get_connection_string( storage_account_name=f"{cls.test_id}st") ip_addresses = AzureExecutor.list_ip_addresses( resource_group=cls.resource_group, vm_name=f"{cls.test_id}-vm") cls.ip_address = ip_addresses[0]["virtualMachine"]["network"][ "publicIpAddresses"][0]["ipAddress"]