예제 #1
0
    def scontrol_update(self, job_core_num, sbatch_file_path, time_limit):
        """Prevent scontrol update locked exception.

        scontrol generates: Job update not available right now, the DB index is being
        set, try again in a bit for job 5.
        """
        try:
            _slurm_job_id = self.submit_slurm_job(job_core_num,
                                                  sbatch_file_path)
            slurm_job_id = _slurm_job_id.split()[3]
            cmd = [
                "scontrol", "update", f"jobid={slurm_job_id}",
                f"TimeLimit={time_limit}"
            ]
            subprocess_call(cmd, attempt=10, sleep_time=10)
            return slurm_job_id
        except Exception as e:
            print_tb(e)
            raise e
예제 #2
0
    def upload(self, key, is_job_key):
        """Upload generated result into gdrive.

        :param key: key of the shared gdrive file
        :returns: True if upload is successful
        """
        try:
            if not is_job_key:
                meta_data = gdrive.get_data_key_ids(self.results_folder_prev)
                key = meta_data[key]

            cmd = [
                env.GDRIVE, "info", "--bytes", key, "-c", env.GDRIVE_METADATA
            ]
            gdrive_info = subprocess_call(cmd, 5, sleep_time=30)
        except Exception as e:
            raise Exception(
                f"{WHERE(1)} E: {key} does not have a match. meta_data={meta_data}. {e}"
            ) from e

        mime_type = gdrive.get_file_info(gdrive_info, "Mime")
        log(f"mime_type={mime_type}")
        self.data_transfer_out += calculate_size(self.patch_file)
        log(f"data_transfer_out={self.data_transfer_out} MB =>"
            f" rounded={int(self.data_transfer_out)} MB")
        if "folder" in mime_type:
            cmd = [
                env.GDRIVE, "upload", "--parent", key, self.patch_file, "-c",
                env.GDRIVE_METADATA
            ]
        elif "gzip" in mime_type or "/zip" in mime_type:
            cmd = [
                env.GDRIVE, "update", key, self.patch_file, "-c",
                env.GDRIVE_METADATA
            ]
        else:
            raise Exception("Files could not be uploaded")

        try:
            log(subprocess_call(cmd, 5))
        except Exception as e:
            print_tb(e)
            raise Exception("E: gdrive could not upload the file") from e
예제 #3
0
    def pre_data_check(self, key):
        if self.data_transfer_in_to_download > self.data_transfer_in_requested:
            # TODO: full refund
            raise Exception(
                "Requested size to download the source_code and data files is greater than the given amount"
            )

        try:
            cmd = ["gdrive", "info", "--bytes", key, "-c", env.GDRIVE_METADATA]
            return subprocess_call(cmd, 1)
        except Exception as e:
            # TODO: gdrive list --query "sharedWithMe"
            print_tb(e)
            raise e
예제 #4
0
파일: ipfs.py 프로젝트: ebloc/ebloc-broker
    def _ipfs_stat(self, ipfs_hash, _is_ipfs_on=True):
        """Return stats of the give IPFS hash.

        This function *may* run for an indetermined time. Returns a dict with the
        size of the block with the given hash.
        """
        if _is_ipfs_on and not is_ipfs_on():
            raise IpfsNotConnected

        with cfg.console.status(
                f"$ ipfs object stat {ipfs_hash} --timeout={cfg.IPFS_TIMEOUT}s"
        ):
            return subprocess_call([
                "ipfs", "object", "stat", ipfs_hash,
                f"--timeout={cfg.IPFS_TIMEOUT}s"
            ])
예제 #5
0
    def gdrive_download_folder(self, name, key, source_code_hash, _id, cache_folder) -> bool:
        log(f"{WHERE(1)}")
        if self._is_cached(source_code_hash, _id):
            return True

        is_continue = False
        with suppress(Exception):
            output = self.job_infos[0]["storage_duration"][_id]
            is_continue = True

        if is_continue and not self.job_infos[0]["is_cached"][source_code_hash] and not output:
            log("## Downloaded as temporary data file", "bold yellow")
            self.folder_path_to_download[source_code_hash] = self.results_folder_prev
        else:
            self.folder_path_to_download[source_code_hash] = cache_folder
            # self.assign_folder_path_to_download(_id, source_code_hash, cache_folder)

        log(f"## downloading => {key}\nPath to download => {self.folder_path_to_download[source_code_hash]}")
        if self.folder_type_dict[source_code_hash] == "folder":
            try:
                folder = self.folder_path_to_download[source_code_hash]
                subprocess_call(
                    ["gdrive", "download", "--recursive", key, "--force", "--path", folder],
                    10,
                )
            except Exception as e:
                raise e

            downloaded_folder_path = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            if not os.path.isdir(downloaded_folder_path):
                # check before move operation
                raise Exception(f"E: Folder ({downloaded_folder_path}) is not downloaded successfully")

            self.data_transfer_in_requested = calculate_size(downloaded_folder_path)
            logging.info(
                f"data_transfer_in_requested={self.data_transfer_in_requested} MB | "
                f"Rounded={int(self.data_transfer_in_requested)} MB"
            )
        else:
            try:
                folder = self.folder_path_to_download[source_code_hash]
                cmd = ["gdrive", "download", key, "--force", "--path", folder]
                subprocess_call(cmd, 10)
            except Exception as e:
                raise e

            file_path = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            if not os.path.isfile(file_path):
                raise Exception(f"{WHERE(1)} E: File {file_path} is not downloaded successfully")

            filename = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            p1 = subprocess.Popen(
                [
                    "ls",
                    "-ln",
                    filename,
                ],
                stdout=subprocess.PIPE,
            )
            p2 = subprocess.Popen(["awk", "{print $5}"], stdin=p1.stdout, stdout=subprocess.PIPE)
            p1.stdout.close()  # type: ignore
            # returns downloaded files size in bytes
            self.data_transfer_in_requested = byte_to_mb(p2.communicate()[0].decode("utf-8").strip())
            logging.info(
                f"data_transfer_in_requested={self.data_transfer_in_requested} MB |"
                f" Rounded={int(self.data_transfer_in_requested)} MB"
            )
예제 #6
0
    def get_data(self, key, _id, is_job_key=False):
        try:
            mime_type, name = self.get_data_init(key, _id, is_job_key)
        except Exception as e:
            print_tb(e)
            raise e

        if is_job_key:
            gdrive_info = self.pre_data_check(key)
            name = gdrive.get_file_info(gdrive_info, "Name")
            mime_type = gdrive.get_file_info(gdrive_info, "Mime")

        # folder is already stored by its source_code_hash
        source_code_hash = name.replace(".tar.gz", "")
        log(f"==> name={name}")
        log(f"==> mime_type=[magenta]{mime_type}")
        if _id == 0:
            # source code folder, ignore downloading result-*
            name = f"{name}.tar.gz"
            try:
                output = gdrive.get_file_id(key)
            except Exception as e:
                print_tb(e)
                raise e

            key = echo_grep_awk(output, name, "1")
            mime_type = "gzip"

        if "gzip" in mime_type:
            try:
                cmd = ["gdrive", "info", "--bytes", key, "-c", env.GDRIVE_METADATA]
                gdrive_info = subprocess_call(cmd, 10)
            except Exception as e:
                print_tb(e)
                raise e

            source_code_hash = gdrive.get_file_info(gdrive_info, "Md5sum")
            self.md5sum_dict[key] = source_code_hash
            log(f"==> md5sum={self.md5sum_dict[key]}")

            # recieved job is in folder tar.gz
            self.folder_type_dict[source_code_hash] = "gzip"
            try:
                self.cache(_id, name, source_code_hash, key, is_job_key)
            except Exception as e:
                print_tb(e)
                raise e

            if is_job_key:
                target = self.results_folder
            else:
                target = f"{self.results_data_folder}/{source_code_hash}"
                mkdir(target)

            try:
                cache_folder = self.folder_path_to_download[source_code_hash]
                untar(f"{cache_folder}/{name}", target)
                return target
            except Exception as e:
                print_tb(e)
                raise e

            self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}")
        elif "folder" in mime_type:
            #: received job is in folder format
            self.folder_type_dict[source_code_hash] = "folder"
            try:
                self.cache(_id, name, source_code_hash, key, is_job_key)
            except Exception as e:
                raise e

            cache_folder = self.folder_path_to_download[source_code_hash]
            cmd = [
                "rsync",
                "-avq",
                "--partial-dir",
                "--omit-dir-times",
                f"{cache_folder}/{name}/",
                self.results_folder,
            ]
            try:
                output = run(cmd)
            except Exception as e:
                print_tb(e)
                raise e

            self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}/")
            tar_file = f"{self.results_folder}/{name}.tar.gz"
            try:
                untar(tar_file, self.results_folder)
                _remove(tar_file)
                return target
            except Exception as e:
                print_tb(e)
                raise e
        else:
            raise Exception("Neither folder or gzip type given.")
예제 #7
0
def size(key, mime_type, folder_name, gdrive_info, results_folder_prev,
         code_hashes, is_cached):
    source_code_key = None
    size_to_download = 0
    if "folder" not in mime_type:
        raise Exception

    try:
        output = get_file_id(key)
        log(f"==> data_id=[magenta]{key}")
        log(output, "bold green")
        data_files_id = fetch_grive_output(output, "meta_data.json")
        # key for the source_code elimination output*.tar.gz files
        source_code_key = fetch_grive_output(output, f"{folder_name}.tar.gz")
        if not data_files_id:
            raise Exception

        cmd = [
            "gdrive",
            "download",
            "--recursive",
            data_files_id,  # first id is meta_data
            "--force",
            "--path",
            results_folder_prev,
        ]
        output = subprocess_call(cmd, 10)
        print(output)
        cmd = [
            "gdrive",
            "info",
            "--bytes",
            source_code_key,
            "-c",
            env.GDRIVE_METADATA,
        ]
        gdrive_info = subprocess_call(cmd, 10)
    except Exception as e:
        print_tb(e)
        # TODO: gdrive list --query "sharedWithMe"
        raise e

    md5sum = get_file_info(gdrive_info, "Md5sum")
    _source_code_hash = code_hashes[0].decode("utf-8")
    if md5sum != _source_code_hash:
        # checks md5sum obtained from gdrive and given by the user
        raise Exception(
            f"E: md5sum does not match with the provided data {source_code_key}"
        )

    log(f"SUCCESS on folder={md5sum}", "bold green")
    byte_size = int(get_file_info(gdrive_info, "Size"))
    log(f"## code_hashes[0] == {_source_code_hash} | size={byte_size} bytes")
    if not is_cached[code_hashes[0].decode("utf-8")]:
        size_to_download += byte_size

    try:
        meta_data = get_data_key_ids(results_folder_prev)
    except Exception as e:
        raise e

    data_key_dict = {}
    if len(meta_data.items()) > 1:
        idx = 0
        for (k, v) in meta_data.items():
            if idx == 0:  # first item is for the source-code itself
                _key = str(v)
                output = get_file_id(_key)
                data_key = fetch_grive_output(output, f"{k}.tar.gz")
                cmd = [
                    "gdrive", "info", "--bytes", data_key, "-c",
                    env.GDRIVE_METADATA
                ]
                gdrive_info = subprocess_call(cmd, 10)
                log(f" * gdrive_info for [green]{k}[/green]:")
                log(gdrive_info, "bold yellow")
                idx += 1
            else:  # should start from the first index
                _key = str(v)
                try:
                    output = get_file_id(_key)
                    data_key = fetch_grive_output(output, f"{k}.tar.gz")
                    cmd = [
                        "gdrive", "info", "--bytes", data_key, "-c",
                        env.GDRIVE_METADATA
                    ]
                    gdrive_info = subprocess_call(cmd, 10)
                except Exception as e:
                    raise e

                md5sum = get_file_info(gdrive_info, _type="Md5sum")
                log(f" * gdrive_info for [green]{k}[/green]:")
                log(gdrive_info, "bold yellow")
                given_code_hash = code_hashes[idx].decode("utf-8")
                log(f"==> given_code_hash={given_code_hash}  idx={idx}")
                if md5sum != given_code_hash:
                    # checks md5sum obtained from gdrive and given by the user
                    raise Exception(
                        f"E: md5sum does not match with the provided data{br(idx)}\n"
                        f"md5sum={md5sum} | given={given_code_hash}")

                data_key_dict[md5sum] = data_key
                _size = int(get_file_info(gdrive_info, "Size"))
                log(f"==> code_hashes{br(idx)} == {code_hashes[idx].decode('utf-8')} size={_size} bytes"
                    )
                byte_size += _size
                if not is_cached[code_hashes[idx].decode("utf-8")]:
                    size_to_download += _size

        if bool(data_key_dict):
            data_link_file = f"{results_folder_prev}/meta_data_link.json"
            with open(data_link_file, "w") as f:
                json.dump(data_key_dict, f)
        else:
            raise Exception("E: Something is wrong. data_key_dict is empty")

    output = byte_to_mb(size_to_download)
    logging.info(
        f"Total_size={byte_size} bytes | size to download={size_to_download} bytes => {output} MB"
    )
    return output, data_key_dict, source_code_key