Exemple #1
0
    def test20_train_env_provision(self):
        # Build docker image and load docker image.
        command = (
            f"docker build -f {self.maro_pkg_path}/docker_files/cpu.runtime.source.df -t maro_runtime_cpu:test "
            f"{self.maro_pkg_path}")
        Subprocess.run(command=command)

        # Run command.
        command = f"maro grass image push {self.cluster_name} --debug --image-name maro_runtime_cpu:test"
        Subprocess.interactive_run(command=command)

        # Check image status, failed if does not meet the desired state in 1000s.
        is_loaded = False
        start_time = time.time()
        while not is_loaded and start_time + 1000 >= time.time():
            try:
                is_loaded = True
                nodes_details = self._list_nodes_details()
                for node_details in nodes_details:
                    self.assertIn("maro_runtime_cpu_test",
                                  node_details["image_files"])
            except AssertionError:
                is_loaded = False
                time.sleep(10)
        self.assertTrue(is_loaded)
Exemple #2
0
    def setUpClass(cls, file_path: str = os.path.abspath(__file__)) -> None:
        # Get and set params
        GlobalParams.LOG_LEVEL = logging.DEBUG
        cls.test_id = uuid.uuid4().hex[:8]
        os.makedirs(
            os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}"),
            exist_ok=True)
        os.makedirs(
            os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}/tar"),
            exist_ok=True)
        cls.file_path = os.path.abspath(__file__)
        cls.dir_path = os.path.dirname(cls.file_path)
        cls.deployment_template_path = os.path.normpath(
            os.path.join(cls.dir_path,
                         "../templates/test_k8s_azure_create.yml"))
        cls.deployment_path = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{cls.test_id}/test_k8s_azure_create.yml")
        cls.config_path = os.path.normpath(
            os.path.join(cls.dir_path, "../config.yml"))

        # Load config and save deployment
        with open(cls.deployment_template_path) as fr:
            deployment_details = yaml.safe_load(fr)
        with open(cls.config_path) as fr:
            config_details = yaml.safe_load(fr)
            if config_details["cloud/subscription"] and config_details[
                    "user/admin_public_key"]:
                deployment_details["cloud"]["subscription"] = config_details[
                    "cloud/subscription"]
                deployment_details["user"][
                    "admin_public_key"] = config_details[
                        "user/admin_public_key"]
            else:
                raise Exception("Invalid config")
        with open(cls.deployment_path, "w") as fw:
            yaml.safe_dump(deployment_details, fw)

        # Get params from deployments
        cls.cluster_name = deployment_details["name"]

        # Init test files
        cls.local_big_file_path = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{cls.test_id}/big_file")
        cls.local_small_files_path = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{cls.test_id}/small_files")
        command = f"dd if=/dev/zero of={cls.local_big_file_path} bs=1 count=0 seek=1G"
        Subprocess.run(command=command)
        command = f"git clone [email protected]:microsoft/maro.git {cls.local_small_files_path}"
        Subprocess.run(command=command)

        # Create cluster
        command = f"maro k8s create --debug {cls.deployment_path}"
        Subprocess.interactive_run(command=command)
        cls.cluster_details = DetailsReader.load_cluster_details(
            cluster_name=cls.cluster_name)
        cls.cluster_id = cls.cluster_details["id"]
        cls.executor = K8sAksExecutor(cluster_name=cls.cluster_name)
        time.sleep(15)
        cls.pod_name = cls._get_redis_pod_name()
Exemple #3
0
    def test20_train_env_provision(self):
        # Build docker image and load docker image.
        command = (
            f"docker build -f {self.maro_pkg_path}/docker_files/cpu.runtime.source.df -t maro_runtime_cpu "
            f"{self.maro_pkg_path}")
        Subprocess.run(command=command)

        # Run command.
        command = f"maro k8s image push {self.cluster_name} --debug --image-name maro_runtime_cpu"
        Subprocess.interactive_run(command=command)
 def get_resource_group(resource_group: str) -> dict:
     command = f"az group show --name {resource_group}"
     try:
         return_str = Subprocess.run(command=command)
         return json.loads(return_str)
     except CommandExecutionError:
         return {}
Exemple #5
0
    def pull_data(self, local_dir: str, remote_path: str) -> None:
        """Pull remote AFS service data to local folder via azcopy.

        Args:
            local_dir (str): path of the local folder.
            remote_path (str): path of the remote data.

        Returns:
            None.
        """
        # Get sas
        sas = self._check_and_get_account_sas()

        # Push data
        abs_local_dir = os.path.expanduser(local_dir)
        source_path = PathConvertor.build_path_without_trailing_slash(
            remote_path)
        abs_target_dir = PathConvertor.build_path_with_trailing_slash(
            abs_local_dir)
        os.makedirs(abs_target_dir, exist_ok=True)
        if not source_path.startswith("/"):
            raise FileOperationError(
                f"Invalid remote path: {source_path}\nShould be started with '/'"
            )
        copy_command = (
            "azcopy copy "
            f"'https://{self.cluster_id}st.file.core.windows.net/{self.cluster_id}-fs{source_path}?{sas}' "
            f"'{abs_target_dir}' "
            "--recursive=True")
        _ = Subprocess.run(command=copy_command)
    def get_static_info() -> dict:
        """ Get static resource information about local environment.

        Returns:
            Tuple[int, list]: (total cpu number, [cpu usage per core])
        """
        static_info = {}
        static_info["cpu"] = psutil.cpu_count()

        memory = psutil.virtual_memory()
        static_info["total_memory"] = round(float(memory.total) / (1024**2), 2)
        static_info["memory"] = round(float(memory.free) / (1024**2), 2)

        gpu_static_command = "nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits"
        try:
            return_str = Subprocess.run(command=gpu_static_command)
            gpus_info = return_str.split(os.linesep)
            static_info["gpu"] = len(gpus_info) - 1  # (int) logical number
            static_info["gpu_name"] = []
            static_info["gpu_memory"] = []
            for info in gpus_info:
                name, total_memory = info.split(", ")
                static_info["gpu_name"].append(name)
                static_info["gpu_memory"].append(total_memory)
        except Exception:
            static_info["gpu"] = 0

        return static_info
 def scale_nodepool(resource_group: str, aks_name: str, nodepool_name: str,
                    node_count: int) -> None:
     command = (f"az aks nodepool scale "
                f"-g {resource_group} "
                f"--cluster-name {aks_name} "
                f"--name {nodepool_name} "
                f"--node-count {node_count}")
     _ = Subprocess.run(command=command)
Exemple #8
0
    def tearDownClass(cls) -> None:
        # Print result.
        print(json.dumps(cls.test_func_to_time, indent=4, sort_keys=True))

        # Delete resource group.
        AzureController.delete_resource_group(
            resource_group=cls.resource_group)

        # Delete tmp test folder.
        shutil.rmtree(f"{GlobalPaths.ABS_MARO_TEST}/{cls.test_id}")

        # Delete docker image.
        try:
            command = "docker rmi maro_runtime_cpu:test"
            Subprocess.run(command=command)
        except CommandExecutionError:
            pass
 def add_nodepool(resource_group: str, aks_name: str, nodepool_name: str,
                  node_count: int, node_size: str) -> None:
     command = (f"az aks nodepool add "
                f"-g {resource_group} "
                f"--cluster-name {aks_name} "
                f"--name {nodepool_name} "
                f"--node-count {node_count} "
                f"--node-vm-size {node_size}")
     _ = Subprocess.run(command=command)
Exemple #10
0
 def test_1_rsync_small_files_to_remote(self) -> None:
     command = (f"ssh -o StrictHostKeyChecking=no "
                f"{self.admin_username}@{self.master_public_ip_address} "
                f"'mkdir -p ~/test/{self.test_id}/test_1_rsync_small_files_to_remote'")
     _ = Subprocess.run(command=command)
     command = (f"rsync -e 'ssh -o StrictHostKeyChecking=no' -az -r "
                f"{self.local_small_files_path} "
                f"{self.admin_username}@{self.master_public_ip_address}:"
                f"~/test/{self.test_id}/test_1_rsync_small_files_to_remote")
     Subprocess.interactive_run(command=command)
Exemple #11
0
    def test12_image(self) -> None:
        # Run command.
        command = f"maro k8s image push {self.cluster_name} --debug --image-name alpine:latest"
        Subprocess.interactive_run(command=command)

        # Check validity.
        command = f"maro k8s image list {self.cluster_name}"
        return_str = Subprocess.run(command=command)
        images = ast.literal_eval(return_str)
        self.assertIn("alpine", images)
Exemple #12
0
    def _get_redis_pod_name(cls) -> str:
        # Get pods details
        command = "kubectl get pods -o json"
        return_str = Subprocess.run(command=command)
        pods_details = json.loads(return_str)["items"]

        # Export logs
        for pod_details in pods_details:
            if pod_details["metadata"]["labels"]["app"] == "maro-redis":
                return pod_details["metadata"]["name"]
Exemple #13
0
 def test_2_tar_ssh_small_files_to_local(self) -> None:
     command = f"mkdir -p {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_tar_ssh_small_files_to_local"
     _ = Subprocess.run(command=command)
     basename = os.path.basename(f"~/test/{self.test_id}/test_1_tar_ssh_small_files_to_remote")
     dirname = os.path.dirname(f"~/test/{self.test_id}/test_1_tar_ssh_small_files_to_remote")
     command = (f"ssh {self.admin_username}@{self.master_public_ip_address} 'tar cf - -C {dirname} {basename}' | "
                f"tar xf - -C {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_tar_ssh_small_files_to_local")
     Subprocess.interactive_run(command=command)
     self.assertTrue(os.path.exists(os.path.expanduser(
         f"{GlobalPaths.MARO_TEST}/{self.test_id}/"
         f"test_2_tar_ssh_small_files_to_local/test_1_tar_ssh_small_files_to_remote/small_files/README.md")))
Exemple #14
0
 def test_2_rsync_small_files_to_local(self) -> None:
     command = f"mkdir -p {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_rsync_small_files_to_local"
     _ = Subprocess.run(command=command)
     command = (f"rsync -e 'ssh -o StrictHostKeyChecking=no' -az -r "
                f"{self.admin_username}@{self.master_public_ip_address}:"
                f"~/test/{self.test_id}/test_1_rsync_small_files_to_remote "
                f"{GlobalPaths.MARO_TEST}/{self.test_id}/test_2_rsync_small_files_to_local")
     Subprocess.interactive_run(command=command)
     self.assertTrue(os.path.exists(os.path.expanduser(
         f"{GlobalPaths.MARO_TEST}/{self.test_id}/"
         f"test_2_rsync_small_files_to_local/test_1_rsync_small_files_to_remote/small_files/README.md")))
Exemple #15
0
    def push_image(self, image_name: str) -> None:
        """Push local image to the MARO Cluster.

        Args:
            image_name (str): name of the local image that loaded in the docker.

        Returns:
            None.
        """
        remote_image_name = f"{self.cluster_id}acr.azurecr.io/{image_name}"

        # ACR login
        AzureController.login_acr(acr_name=f"{self.cluster_id}acr")

        # Tag image
        command = f"docker tag {image_name} {remote_image_name}"
        _ = Subprocess.run(command=command)

        # Push image to ACR
        command = f"docker push {remote_image_name}"
        _ = Subprocess.run(command=command)
Exemple #16
0
    def test_1_tar_ssh_small_files_to_remote(self) -> None:
        command = (f"ssh -o StrictHostKeyChecking=no "
                   f"{self.admin_username}@{self.master_public_ip_address} "
                   f"'mkdir -p ~/test/{self.test_id}/test_1_tar_ssh_small_files_to_remote'")
        _ = Subprocess.run(command=command)

        basename = os.path.basename(self.local_small_files_path)
        dirname = os.path.dirname(self.local_small_files_path)
        command = (f"tar cf - -C {dirname} {basename} | "
                   f"ssh {self.admin_username}@{self.master_public_ip_address} "
                   f"'tar xf - -C ~/test/{self.test_id}/test_1_tar_ssh_small_files_to_remote'")
        Subprocess.interactive_run(command=command)
    def __init__(self):
        self._redis_connection = redis.Redis(
            host="localhost", port=LocalParams.RESOURCE_REDIS_PORT)
        try:
            self._redis_connection.ping()
        except Exception:
            start_redis_command = f"redis-server --port {str(LocalParams.RESOURCE_REDIS_PORT)} --daemonize yes"
            _ = Subprocess.run(start_redis_command)

            # Start Resource Agents
            start_agent_command = f"python {LocalPaths.MARO_RESOURCE_AGENT}"
            _ = subprocess.Popen(start_agent_command, shell=True)
Exemple #18
0
    def template(export_path: str) -> None:
        """Export deployment template of grass mode.

        Args:
            export_path (str): location to export the templates.

        Returns:
            None.
        """
        # Get templates
        command = f"cp {GrassPaths.MARO_GRASS_LIB}/deployments/external/* {export_path}"
        _ = Subprocess.run(command=command)
Exemple #19
0
    def get_connection_string(storage_account_name: str) -> str:
        """Get the connection string for a storage account.

        Args:
            storage_account_name: The storage account name.

        Returns:
            str: Connection string.
        """
        command = f"az storage account show-connection-string --name {storage_account_name}"
        return_str = Subprocess.run(command=command)
        return json.loads(return_str)["connectionString"]
Exemple #20
0
 def start_deployment(resource_group: str, deployment_name: str,
                      template_file_path: str,
                      parameters_file_path: str) -> None:
     command = (
         f"az deployment group create -g {resource_group} --name {deployment_name} "
         f"--template-file {template_file_path} --parameters {parameters_file_path}"
     )
     try:
         _ = Subprocess.run(command=command)
     except CommandExecutionError as e:
         error = json.loads(AzureController._get_valid_json(
             e.get_message()))["error"]
         raise DeploymentError(error["message"])
    def copy_files_to_node(local_path: str, remote_dir: str,
                           node_username: str, node_hostname: str,
                           node_ssh_port: int) -> None:
        """Copy local files to node, automatically create folder if not exist.

        Args:
            local_path (str): path of the local file.
            remote_dir (str): dir for remote files.
            node_username (str): username of the vm.
            node_hostname (str): hostname of the vm.
            node_ssh_port (int): port of the ssh connection.
        """
        source_path = PathConvertor.build_path_without_trailing_slash(
            local_path)
        basename = os.path.basename(source_path)
        folder_name = os.path.expanduser(os.path.dirname(source_path))
        target_dir = PathConvertor.build_path_with_trailing_slash(remote_dir)

        mkdir_script = (
            f"ssh -o StrictHostKeyChecking=no -p {node_ssh_port} {node_username}@{node_hostname} "
            f"'mkdir -p {target_dir}'")
        _ = Subprocess.run(command=mkdir_script)

        if platform.system() in ["Linux", "Darwin"]:
            # Copy with pipe
            copy_script = (
                f"tar czf - -C {folder_name} {basename} | "
                f"ssh -o StrictHostKeyChecking=no -p {node_ssh_port} {node_username}@{node_hostname} "
                f"'tar xzf - -C {target_dir}'")
            _ = Subprocess.run(command=copy_script)
        else:
            # Copy with tmp file
            tmp_file_name = uuid.uuid4()
            maro_local_tmp_abs_path = os.path.expanduser(
                GlobalPaths.MARO_LOCAL_TMP)

            tar_script = f"tar czf {maro_local_tmp_abs_path}/{tmp_file_name} -C {folder_name} {basename}"
            _ = Subprocess.run(command=tar_script)
            copy_script = (
                f"scp {maro_local_tmp_abs_path}/{tmp_file_name} "
                f"{node_username}@{node_hostname}:{GlobalPaths.MARO_LOCAL_TMP}"
            )
            _ = Subprocess.run(command=copy_script)
            untar_script = (
                f"ssh -o StrictHostKeyChecking=no -p {node_ssh_port} {node_username}@{node_hostname} "
                f"'tar xzf {GlobalPaths.MARO_LOCAL_TMP}/{tmp_file_name} -C {target_dir}'"
            )
            _ = Subprocess.run(untar_script)
            remove_script = f"rm {maro_local_tmp_abs_path}/{tmp_file_name}"
            _ = Subprocess.run(remove_script)
            remote_remove_script = (
                f"ssh -o StrictHostKeyChecking=no -p {node_ssh_port} {node_username}@{node_hostname} "
                f"'rm {GlobalPaths.MARO_LOCAL_TMP}/{tmp_file_name}'")
            _ = Subprocess.run(command=remote_remove_script)
Exemple #22
0
 def get_storage_account_sas(
     account_name: str,
     services: str = "bqtf",
     resource_types: str = "sco",
     permissions: str = "rwdlacup",
     expiry: str = (datetime.datetime.utcnow() + datetime.timedelta(
         days=365)).strftime("%Y-%m-%dT%H:%M:%S") + "Z"
 ) -> str:
     command = (
         f"az storage account generate-sas --account-name {account_name} --services {services} "
         f"--resource-types {resource_types} --permissions {permissions} --expiry {expiry}"
     )
     sas_str = Subprocess.run(command=command).strip("\n").replace('"', "")
     logger.debug(sas_str)
     return sas_str
Exemple #23
0
    def setUpClass(cls) -> None:
        # Set Env.
        GlobalParams.LOG_LEVEL = logging.DEBUG

        # Init folders.
        os.makedirs(f"{GlobalPaths.ABS_MARO_TEST}/{cls.test_id}",
                    exist_ok=False)

        # Load config and save deployment.
        with open(file=cls.create_deployment_template_path, mode="r") as fr:
            create_deployment = yaml.safe_load(fr)
        with open(file=cls.test_config_path, mode="r") as fr:
            test_config = yaml.safe_load(fr)
            if test_config["cloud/subscription"] and test_config[
                    "cloud/default_public_key"]:
                create_deployment["name"] = f"test_maro_k8s_{cls.test_id}"
                create_deployment["cloud"]["subscription"] = test_config[
                    "cloud/subscription"]
                create_deployment["cloud"][
                    "resource_group"] = f"test_maro_k8s_{cls.test_id}"
                create_deployment["cloud"]["default_public_key"] = test_config[
                    "cloud/default_public_key"]
            else:
                raise Exception("Invalid config")
        with open(file=cls.create_deployment_path, mode="w") as fw:
            yaml.safe_dump(create_deployment, fw)

        # Get params from deployments.
        cls.resource_group = create_deployment["cloud"]["resource_group"]
        cls.cluster_name = create_deployment["name"]

        # Pull testing images.
        command = "docker pull alpine:latest"
        Subprocess.run(command=command)
        command = "docker pull ubuntu:latest"
        Subprocess.run(command=command)
Exemple #24
0
    def test21_train_dqn(self) -> None:
        # Copy dqn examples to test folder.
        dqn_source_dir = os.path.normpath(
            os.path.join(self.maro_pkg_path, "./examples/cim/dqn"))
        dqn_target_dir = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{self.test_id}/train/dqn")
        os.makedirs(os.path.dirname(
            f"{GlobalPaths.ABS_MARO_TEST}/{self.test_id}/train/dqn"),
                    exist_ok=True)
        command = f"cp -r {dqn_source_dir} {GlobalPaths.ABS_MARO_TEST}/{self.test_id}/train/dqn"
        Subprocess.run(command=command)

        # Get cluster details and rebuild config
        cluster_details = self._get_cluster_details()
        with open(f"{dqn_target_dir}/config.yml", 'r') as fr:
            config = yaml.safe_load(fr)
        with open(f"{dqn_target_dir}/distributed_config.yml", "r") as fr:
            distributed_config = yaml.safe_load(fr)
        with open(f"{dqn_target_dir}/config.yml", "w") as fw:
            config["main_loop"]["max_episode"] = 25
            config["main_loop"]["exploration"]["split_ep"] = 20
            yaml.safe_dump(config, fw)
        with open(f"{dqn_target_dir}/distributed_config.yml", 'w') as fw:
            distributed_config["redis"]["hostname"] = cluster_details["redis"][
                "private_ip_address"]
            yaml.safe_dump(distributed_config, fw)

        # Push dqn folder to cluster
        command = (
            f"maro k8s data push {self.cluster_name} --debug "
            f"'{GlobalPaths.MARO_TEST}/{self.test_id}/train/dqn' '/train'")
        Subprocess.run(command=command)

        # Start job.
        start_job_dqn_template_path = os.path.normpath(
            os.path.join(self.test_dir_path,
                         "./modes/aks/k8s_aks_start_job_dqn.yml"))
        command = f"maro k8s job start {self.cluster_name} {start_job_dqn_template_path}"
        Subprocess.run(command=command)
        self._gracefully_wait(60)

        # Check job status.
        remain_idx = 0
        is_finished = False
        while remain_idx <= 100:
            name_to_job_details = self._get_name_to_job_details()
            job_details = name_to_job_details[self.job_name]
            if "succeeded" in job_details["status"] and job_details["status"][
                    "succeeded"] == 1:
                is_finished = True
                break
            time.sleep(10)
            remain_idx += 1
        self.assertTrue(is_finished)
Exemple #25
0
    def test_2_kubectl_exec_big_file_to_local(self) -> None:
        command = f"mkdir -p {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_kubectl_exec_big_file_to_local"
        _ = Subprocess.run(command=command)

        basename = os.path.basename(
            f"/mnt/maro/{self.test_id}/test_1_kubectl_exec_big_file_to_remote")
        dirname = os.path.dirname(
            f"/mnt/maro/{self.test_id}/test_1_kubectl_exec_big_file_to_remote")
        command = (
            f"kubectl exec -i {self.pod_name} -- tar cf - -C {dirname} {basename}  | "
            f"tar xf - -C {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_kubectl_exec_big_file_to_local"
        )
        Subprocess.interactive_run(command=command)
        self.assertTrue(
            os.path.exists(
                os.path.expanduser(
                    f"{GlobalPaths.MARO_TEST}/{self.test_id}/"
                    f"test_2_kubectl_exec_big_file_to_local/test_1_kubectl_exec_big_file_to_remote/big_file"
                )))
Exemple #26
0
    def remove_data(self, remote_path: str) -> None:
        """Remote data at the remote AFS service.

        Args:
            remote_path (str): path of the remote data.

        Returns:
            None.
        """
        # FIXME: Remove failed, The specified resource may be in use by an SMB client

        # Get sas
        sas = self._check_and_get_account_sas()

        # Remove data
        copy_command = (
            "azcopy remove "
            f"'https://{self.cluster_id}st.file.core.windows.net/{self.cluster_id}-fs{remote_path}?{sas}' "
            "--recursive=True")
        _ = Subprocess.run(command=copy_command)
Exemple #27
0
    def test_ssh_default_port_connection(node_username: str,
                                         node_hostname: str,
                                         node_ssh_port: int) -> None:
        """Test ssh connection.

        Args:
            node_username (str): username of the MARO Node VM.
            node_hostname (str): hostname of the MARO Node VM.
            node_ssh_port (str): ssh port of the MARO Node VM.

        Raises:
            CliError / TimeoutExpired: if the connection is failed.

        Returns:
            None.
        """
        command = (
            f"ssh -o StrictHostKeyChecking=no -p {node_ssh_port} {node_username}@{node_hostname} "
            "echo 'Connection established'")
        _ = Subprocess.run(command=command, timeout=5)
Exemple #28
0
    def test_2_azcopy_small_files_to_local(self) -> None:
        sas = self.executor._check_and_get_account_sas()
        command = f"mkdir -p {GlobalPaths.MARO_TEST}/{self.test_id}/test_2_azcopy_small_files_to_local"
        _ = Subprocess.run(command=command)

        local_path = os.path.expanduser(
            f"{GlobalPaths.MARO_TEST}/{self.test_id}/test_2_azcopy_small_files_to_local"
        )
        command = (
            f"azcopy copy "
            f"'https://{self.cluster_id}st.file.core.windows.net/{self.cluster_id}-fs"
            f"/{self.test_id}/test_1_azcopy_small_files_to_remote?{sas}' "
            f"'{local_path}' "
            f"--recursive=True")
        Subprocess.interactive_run(command=command)
        self.assertTrue(
            os.path.exists(
                os.path.expanduser(
                    f"{GlobalPaths.MARO_TEST}/{self.test_id}/"
                    f"test_2_azcopy_small_files_to_local/test_1_azcopy_small_files_to_remote/small_files"
                )))
Exemple #29
0
    def remote_create_user(master_username: str, master_hostname: str,
                           master_ssh_port: int, user_id: str,
                           user_role: str) -> dict:
        """Remote create MARO User.

        Exec /lib/scripts/master/create_user.py remotely.

        Args:
            master_username (str): username of the MARO Master VM.
            master_hostname (str): hostname of the MARO Master VM.
            master_ssh_port (int): ssh port of the MARO Master VM.
            user_id (str): id of the MARO User.
            user_role (str): role of the MARO User, currently we only have 'admin' at this time.

        Returns:
            dict: details of the created MARO User.
        """
        command = (
            f"ssh -o StrictHostKeyChecking=no -p {master_ssh_port} {master_username}@{master_hostname} "
            f"'cd {GlobalPaths.MARO_SHARED}/lib/grass; python3 -m scripts.master.create_user "
            f"{user_id} {user_role}'")
        return_str = Subprocess.run(command=command)
        return json.loads(return_str)
    def get_dynamic_info(interval: int = None) -> dict:
        """ Get dynamic resource information about local environment.

        Returns:
            Tuple[float]: (total memory, free memory, used memory, memory usage)
        """
        dynamic_info = {}
        dynamic_info["cpu_usage_per_core"] = psutil.cpu_percent(
            interval=interval, percpu=True)

        memory = psutil.virtual_memory()
        dynamic_info["memory_usage"] = memory.percent / 100

        gpu_dynamic_command = "nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits"
        dynamic_info["gpu_memory_usage"] = []
        try:
            return_str = Subprocess.run(command=gpu_dynamic_command)
            memory_usage_per_gpu = return_str.split("\n")
            for single_usage in memory_usage_per_gpu:
                dynamic_info["gpu_memory_usage"].append(float(single_usage))
        except Exception:
            pass

        return dynamic_info