Beispiel #1
0
def calculate_size(path, _type="MB") -> float:
    """Return the size of the given path in MB, bytes if wanted."""
    p1 = subprocess.Popen(["du", "-sb", path], stdout=subprocess.PIPE)
    p2 = subprocess.Popen(["awk", "{print $1}"],
                          stdin=p1.stdout,
                          stdout=subprocess.PIPE)
    p1.stdout.close()  # type: ignore
    byte_size = float(p2.communicate()[0].decode("utf-8").strip())
    if _type == "bytes":
        return byte_size
    else:
        return byte_to_mb(byte_size)
Beispiel #2
0
    def _ipfs_add_folder(self, folder_path):
        try:
            self.result_ipfs_hash = cfg.ipfs.add(folder_path)
            log(f"==> result_ipfs_hash={self.result_ipfs_hash}")
            cfg.ipfs.pin(self.result_ipfs_hash)
            data_transfer_out = cfg.ipfs.get_cumulative_size(
                self.result_ipfs_hash)
        except Exception as e:
            print_tb(e)
            raise e

        data_transfer_out = byte_to_mb(data_transfer_out)
        self.data_transfer_out += data_transfer_out
Beispiel #3
0
    def check_ipfs(self, ipfs_hash) -> None:
        """Append into self.ipfs_hashes."""
        try:
            ipfs_stat, cumulative_size = cfg.ipfs.is_hash_exists_online(
                ipfs_hash)
            if "CumulativeSize" not in ipfs_stat:
                raise Exception(
                    "E: Markle not found! Timeout for the IPFS object stat retrieve"
                )
        except Exception as e:
            print_tb(e)
            raise e

        self.ipfs_hashes.append(ipfs_hash)
        self.cumulative_sizes[self.job_key] = cumulative_size
        data_size_mb = byte_to_mb(cumulative_size)
        logging.info(
            f"data_transfer_out={data_size_mb} MB | Rounded={int(data_size_mb)} MB"
        )
Beispiel #4
0
    def gdrive_download_folder(self, name, key, source_code_hash, _id, cache_folder) -> bool:
        log(f"{WHERE(1)}")
        if self._is_cached(source_code_hash, _id):
            return True

        is_continue = False
        with suppress(Exception):
            output = self.job_infos[0]["storage_duration"][_id]
            is_continue = True

        if is_continue and not self.job_infos[0]["is_cached"][source_code_hash] and not output:
            log("## Downloaded as temporary data file", "bold yellow")
            self.folder_path_to_download[source_code_hash] = self.results_folder_prev
        else:
            self.folder_path_to_download[source_code_hash] = cache_folder
            # self.assign_folder_path_to_download(_id, source_code_hash, cache_folder)

        log(f"## downloading => {key}\nPath to download => {self.folder_path_to_download[source_code_hash]}")
        if self.folder_type_dict[source_code_hash] == "folder":
            try:
                folder = self.folder_path_to_download[source_code_hash]
                subprocess_call(
                    ["gdrive", "download", "--recursive", key, "--force", "--path", folder],
                    10,
                )
            except Exception as e:
                raise e

            downloaded_folder_path = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            if not os.path.isdir(downloaded_folder_path):
                # check before move operation
                raise Exception(f"E: Folder ({downloaded_folder_path}) is not downloaded successfully")

            self.data_transfer_in_requested = calculate_size(downloaded_folder_path)
            logging.info(
                f"data_transfer_in_requested={self.data_transfer_in_requested} MB | "
                f"Rounded={int(self.data_transfer_in_requested)} MB"
            )
        else:
            try:
                folder = self.folder_path_to_download[source_code_hash]
                cmd = ["gdrive", "download", key, "--force", "--path", folder]
                subprocess_call(cmd, 10)
            except Exception as e:
                raise e

            file_path = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            if not os.path.isfile(file_path):
                raise Exception(f"{WHERE(1)} E: File {file_path} is not downloaded successfully")

            filename = f"{self.folder_path_to_download[source_code_hash]}/{name}"
            p1 = subprocess.Popen(
                [
                    "ls",
                    "-ln",
                    filename,
                ],
                stdout=subprocess.PIPE,
            )
            p2 = subprocess.Popen(["awk", "{print $5}"], stdin=p1.stdout, stdout=subprocess.PIPE)
            p1.stdout.close()  # type: ignore
            # returns downloaded files size in bytes
            self.data_transfer_in_requested = byte_to_mb(p2.communicate()[0].decode("utf-8").strip())
            logging.info(
                f"data_transfer_in_requested={self.data_transfer_in_requested} MB |"
                f" Rounded={int(self.data_transfer_in_requested)} MB"
            )
Beispiel #5
0
def size(key, mime_type, folder_name, gdrive_info, results_folder_prev,
         code_hashes, is_cached):
    source_code_key = None
    size_to_download = 0
    if "folder" not in mime_type:
        raise Exception

    try:
        output = get_file_id(key)
        log(f"==> data_id=[magenta]{key}")
        log(output, "bold green")
        data_files_id = fetch_grive_output(output, "meta_data.json")
        # key for the source_code elimination output*.tar.gz files
        source_code_key = fetch_grive_output(output, f"{folder_name}.tar.gz")
        if not data_files_id:
            raise Exception

        cmd = [
            "gdrive",
            "download",
            "--recursive",
            data_files_id,  # first id is meta_data
            "--force",
            "--path",
            results_folder_prev,
        ]
        output = subprocess_call(cmd, 10)
        print(output)
        cmd = [
            "gdrive",
            "info",
            "--bytes",
            source_code_key,
            "-c",
            env.GDRIVE_METADATA,
        ]
        gdrive_info = subprocess_call(cmd, 10)
    except Exception as e:
        print_tb(e)
        # TODO: gdrive list --query "sharedWithMe"
        raise e

    md5sum = get_file_info(gdrive_info, "Md5sum")
    _source_code_hash = code_hashes[0].decode("utf-8")
    if md5sum != _source_code_hash:
        # checks md5sum obtained from gdrive and given by the user
        raise Exception(
            f"E: md5sum does not match with the provided data {source_code_key}"
        )

    log(f"SUCCESS on folder={md5sum}", "bold green")
    byte_size = int(get_file_info(gdrive_info, "Size"))
    log(f"## code_hashes[0] == {_source_code_hash} | size={byte_size} bytes")
    if not is_cached[code_hashes[0].decode("utf-8")]:
        size_to_download += byte_size

    try:
        meta_data = get_data_key_ids(results_folder_prev)
    except Exception as e:
        raise e

    data_key_dict = {}
    if len(meta_data.items()) > 1:
        idx = 0
        for (k, v) in meta_data.items():
            if idx == 0:  # first item is for the source-code itself
                _key = str(v)
                output = get_file_id(_key)
                data_key = fetch_grive_output(output, f"{k}.tar.gz")
                cmd = [
                    "gdrive", "info", "--bytes", data_key, "-c",
                    env.GDRIVE_METADATA
                ]
                gdrive_info = subprocess_call(cmd, 10)
                log(f" * gdrive_info for [green]{k}[/green]:")
                log(gdrive_info, "bold yellow")
                idx += 1
            else:  # should start from the first index
                _key = str(v)
                try:
                    output = get_file_id(_key)
                    data_key = fetch_grive_output(output, f"{k}.tar.gz")
                    cmd = [
                        "gdrive", "info", "--bytes", data_key, "-c",
                        env.GDRIVE_METADATA
                    ]
                    gdrive_info = subprocess_call(cmd, 10)
                except Exception as e:
                    raise e

                md5sum = get_file_info(gdrive_info, _type="Md5sum")
                log(f" * gdrive_info for [green]{k}[/green]:")
                log(gdrive_info, "bold yellow")
                given_code_hash = code_hashes[idx].decode("utf-8")
                log(f"==> given_code_hash={given_code_hash}  idx={idx}")
                if md5sum != given_code_hash:
                    # checks md5sum obtained from gdrive and given by the user
                    raise Exception(
                        f"E: md5sum does not match with the provided data{br(idx)}\n"
                        f"md5sum={md5sum} | given={given_code_hash}")

                data_key_dict[md5sum] = data_key
                _size = int(get_file_info(gdrive_info, "Size"))
                log(f"==> code_hashes{br(idx)} == {code_hashes[idx].decode('utf-8')} size={_size} bytes"
                    )
                byte_size += _size
                if not is_cached[code_hashes[idx].decode("utf-8")]:
                    size_to_download += _size

        if bool(data_key_dict):
            data_link_file = f"{results_folder_prev}/meta_data_link.json"
            with open(data_link_file, "w") as f:
                json.dump(data_key_dict, f)
        else:
            raise Exception("E: Something is wrong. data_key_dict is empty")

    output = byte_to_mb(size_to_download)
    logging.info(
        f"Total_size={byte_size} bytes | size to download={size_to_download} bytes => {output} MB"
    )
    return output, data_key_dict, source_code_key