def _upload(dir_path, tar_hash, is_folder=False): if is_folder: subprocess.run( ["gdrive", "upload", "--recursive", f"{dir_path}/{tar_hash}"], check=True) output = (subprocess.check_output([ "gdrive", "list", "--query", f"name='{tar_hash}' and trashed=false", "--no-header" ]).decode("utf-8").strip()) else: # subprocess.run(['mv', folderToShare + '.tar.gz', tar_hash + '.tar.gz']) file_name_to_upload = f"{tar_hash}.tar.gz" tar_file_path = f"{dir_path}/{file_name_to_upload}" subprocess.run(["gdrive", "upload", tar_file_path], check=True) _remove(tar_file_path) output = (subprocess.check_output([ "gdrive", "list", "--query", f"name='{file_name_to_upload}' and trashed=false", "--no-header", ]).decode("utf-8").strip()) # cmd = ['gdrive', 'list', '--query', 'name contains \'' + tar_hash + '.tar.gz' + '\'', '--no-header'] # output = subprocess.check_output(cmd).decode('utf-8').strip() return output.split(" ", maxsplit=1)[0]
def upload(folder_to_share, tmp_dir, job_key_flag=False): tar_hashes = {} is_already_uploaded = False log(f"==> job_key_flag={job_key_flag}, tar.gz file is inside the base folder" ) dir_path = os.path.dirname(folder_to_share) tar_hash, _ = compress_folder(folder_to_share, is_exclude_git=True) tar_hashes[folder_to_share] = tar_hash path_to_move = f"{dir_path}/{tar_hash}" _from = f"{dir_path}/{tar_hash}.tar.gz" _to = f"{path_to_move}/{tar_hash}.tar.gz" mkdir(path_to_move) shutil.move(_from, _to) if job_key_flag: shutil.copyfile(f"{tmp_dir}/meta_data.json", f"{path_to_move}/meta_data.json") is_file_exist = _list(tar_hash, is_folder=True) if not is_file_exist: key = _upload(dir_path, tar_hash, is_folder=True) log(f"{_list(tar_hash)}", "bold green") else: log(f"## requested folder {tar_hash} is already uploaded", "bold blue") log(is_file_exist, "bold green") key = is_file_exist.partition("\n")[0].split()[0] is_already_uploaded = True _remove(f"{dir_path}/{tar_hash}") # created .tar.gz file is removed return key, is_already_uploaded, tar_hash, tar_hashes
def main(): coll = cl["eBlocBroker"]["cache"] block_number = Ebb.get_block_number() storageID = None cursor = coll.find({}) for document in cursor: # print(document) received_block_number, storage_duration = Ebb.get_job_storage_duration( env.PROVIDER_ID, document["sourceCodeHash"] ) end_block_time = received_block_number + storage_duration * cfg.BLOCK_DURATION_1_HOUR storageID = document["storageID"] if end_block_time < block_number and received_block_number != 0: if storageID in (StorageID.IPFS, StorageID.IPFS_GPG): ipfsHash = document["jobKey"] print(run(["ipfs", "pin", "rm", ipfsHash])) print(run(["ipfs", "repo", "gc"])) else: cached_file_name = ( env.PROGRAM_PATH / document["requesterID"] / "cache" / document["sourceCodeHash"] + "tar.gz" ) print(cached_file_name) _remove(cached_file_name) cached_file_name = env.PROGRAM_PATH / "cache" / document["sourceCodeHash"] + "tar.gz" print(cached_file_name) _remove(cached_file_name) print(received_block_number) coll.delete_one({"jobKey": ipfsHash})
def upload(self, *_): """Upload files right after all the patchings are completed.""" try: cfg.ipfs.gpg_encrypt(self.requester_gpg_fingerprint, self.patch_file) except Exception as e: _remove(self.patch_file) raise e
def eudat_download_folder(self, results_folder_prev, folder_name): """Download corresponding folder from the EUDAT. Always assumes job is sent as .tar.gz file """ # TODO: check hash of the downloaded file is correct or not cached_tar_file = f"{results_folder_prev}/{folder_name}.tar.gz" log("#> downloading [green]output.zip[/green] for:", end="") log(f"{folder_name} => {cached_tar_file} ", "bold") key = folder_name share_key = f"{folder_name}_{self.requester_id[:16]}" for attempt in range(1): try: log("## Trying [blue]wget[/blue] approach...") token = self.share_id[share_key]["share_token"] if token: download_fn = f"{cached_tar_file.replace('.tar.gz', '')}_{self.requester_id}.download" cmd = [ "wget", "-O", download_fn, "-c", f"https://b2drop.eudat.eu/s/{token}/download", "-q", "--show-progres", "--progress=bar:force", ] log(" ".join(cmd), is_code=True, color="yellow") run(cmd) with cd(results_folder_prev): run(["unzip", "-o", "-j", download_fn]) _remove(download_fn) self.tar_downloaded_path[folder_name] = cached_tar_file log(f"## download file from eudat {ok()}") return except: log("E: Failed to download eudat file via wget.\nTrying config.oc.get_file() approach..." ) if config.oc.get_file(f"/{key}/{folder_name}.tar.gz", cached_tar_file): self.tar_downloaded_path[folder_name] = cached_tar_file log(ok()) return else: logging.error( f"E: Something is wrong, oc could not retrieve the file [attempt:{attempt}]" ) raise Exception("Eudat download error")
def remove_files(filename) -> bool: """Remove given file path.""" if "*" in filename: for f in glob.glob(filename): try: _remove(f) except Exception as e: print_tb(str(e)) return False else: try: _remove(filename) except Exception as e: print_tb(str(e)) return False return True
def remove_user(user_name, user_dir=None): """Remove user from Slurm. # for test purposes sudo userdel $USERNAME sudo rm -rf $BASEDIR/$USERNAME sacctmgr remove user where user=$USERNAME --immediate """ run(["sudo", "userdel", "--force", user_name]) cmd = [ "sacctmgr", "remove", "user", "where", f"user={user_name}", "--immediate" ] p, output, *_ = popen_communicate(cmd) if p.returncode != 0 and "Nothing deleted" not in output: raise Exception(f"E: sacctmgr remove error: {output}") # remove_user(user) if user_dir: _remove(user_dir)
def decrypt_using_gpg(self, gpg_file, extract_target=None): """Decrypt compresses file using gpg. This function is specific for using on driver.ipfs to decript tar file, specific for "tar.gz" file types. cmd: gpg --verbose --output={tar_file} --pinentry-mode loopback \ --passphrase-file=f"{env.LOG_PATH}/gpg_pass.txt" \ --decrypt {gpg_file_link} """ if not os.path.isfile(f"{gpg_file}.gpg"): os.symlink(gpg_file, f"{gpg_file}.gpg") gpg_file_link = f"{gpg_file}.gpg" tar_file = f"{gpg_file}.tar.gz" cmd = [ "gpg", "--verbose", "--batch", "--yes", f"--output={tar_file}", "--pinentry-mode", "loopback", f"--passphrase-file={env.GPG_PASS_FILE}", "--decrypt", gpg_file_link, ] try: run(cmd) log(f"==> GPG decrypt {ok()}") _remove(gpg_file) os.unlink(gpg_file_link) except Exception as e: print_tb(e) raise e # finally: # os.unlink(gpg_file_link) if extract_target: try: untar(tar_file, extract_target) except Exception as e: raise Exception( "E: Could not extract the given tar file") from e finally: cmd = None _remove(f"{extract_target}/.git") _remove(tar_file)
def get_data(self, key, _id, is_job_key=False): try: mime_type, name = self.get_data_init(key, _id, is_job_key) except Exception as e: print_tb(e) raise e if is_job_key: gdrive_info = self.pre_data_check(key) name = gdrive.get_file_info(gdrive_info, "Name") mime_type = gdrive.get_file_info(gdrive_info, "Mime") # folder is already stored by its source_code_hash source_code_hash = name.replace(".tar.gz", "") log(f"==> name={name}") log(f"==> mime_type=[magenta]{mime_type}") if _id == 0: # source code folder, ignore downloading result-* name = f"{name}.tar.gz" try: output = gdrive.get_file_id(key) except Exception as e: print_tb(e) raise e key = echo_grep_awk(output, name, "1") mime_type = "gzip" if "gzip" in mime_type: try: cmd = ["gdrive", "info", "--bytes", key, "-c", env.GDRIVE_METADATA] gdrive_info = subprocess_call(cmd, 10) except Exception as e: print_tb(e) raise e source_code_hash = gdrive.get_file_info(gdrive_info, "Md5sum") self.md5sum_dict[key] = source_code_hash log(f"==> md5sum={self.md5sum_dict[key]}") # recieved job is in folder tar.gz self.folder_type_dict[source_code_hash] = "gzip" try: self.cache(_id, name, source_code_hash, key, is_job_key) except Exception as e: print_tb(e) raise e if is_job_key: target = self.results_folder else: target = f"{self.results_data_folder}/{source_code_hash}" mkdir(target) try: cache_folder = self.folder_path_to_download[source_code_hash] untar(f"{cache_folder}/{name}", target) return target except Exception as e: print_tb(e) raise e self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}") elif "folder" in mime_type: #: received job is in folder format self.folder_type_dict[source_code_hash] = "folder" try: self.cache(_id, name, source_code_hash, key, is_job_key) except Exception as e: raise e cache_folder = self.folder_path_to_download[source_code_hash] cmd = [ "rsync", "-avq", "--partial-dir", "--omit-dir-times", f"{cache_folder}/{name}/", self.results_folder, ] try: output = run(cmd) except Exception as e: print_tb(e) raise e self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}/") tar_file = f"{self.results_folder}/{name}.tar.gz" try: untar(tar_file, self.results_folder) _remove(tar_file) return target except Exception as e: print_tb(e) raise e else: raise Exception("Neither folder or gzip type given.")
def remove_downloaded_file(self, source_code_hash, _id, pathname): if not self.job_infos[0]["is_cached"][source_code_hash] and self.job_infos[0]["storage_duration"][_id]: _remove(pathname)
def cache(self, _id, name, source_code_hash, key, is_job_key) -> None: if _id == 0: if self.folder_type_dict[source_code_hash] == "folder": self.folder_type_dict[source_code_hash] = "gzip" self.check_already_cached(source_code_hash) if self.cache_type[_id] == CacheType.PRIVATE: # first checking does is already exist under public cache directory cache_folder = self.private_dir cached_tar_file = cache_folder / name if self.folder_type_dict[source_code_hash] == "gzip": if os.path.isfile(cached_tar_file): self.job_infos[0]["is_cached"][source_code_hash] = True self.assign_folder_path_to_download(_id, source_code_hash, cached_tar_file) output = generate_md5sum(cached_tar_file) if output != self.md5sum_dict[key]: raise Exception("File's md5sum does not match with its orignal md5sum value") if output == source_code_hash: # checking is already downloaded folder's hash matches with the given hash log(f"==> {name} is already cached within the private cache directory") self.cache_type[_id] = CacheType.PRIVATE return else: self.gdrive_download_folder(name, key, source_code_hash, _id, cache_folder) elif self.folder_type_dict[source_code_hash] == "folder": output = "" if os.path.isfile(cached_tar_file): self.job_infos[0]["is_cached"][source_code_hash] = True self.assign_folder_path_to_download(_id, source_code_hash, cache_folder) output = generate_md5sum(cached_tar_file) elif os.path.isdir(cache_folder): self.job_infos[0]["is_cached"][source_code_hash] = True self.folder_path_to_download[source_code_hash] = cache_folder output = generate_md5sum(cache_folder) if output == source_code_hash: # checking is already downloaded folder's hash matches with the given hash log(f"==> {name} is already cached within the private cache directory") self.cache_type[_id] = CacheType.PRIVATE return else: self.gdrive_download_folder(name, key, source_code_hash, _id, cache_folder) elif self.cache_type[_id] == CacheType.PUBLIC: cache_folder = self.public_dir cached_tar_file = cache_folder / name if self.folder_type_dict[source_code_hash] == "gzip": if not os.path.isfile(cached_tar_file): self.gdrive_download_folder(name, key, source_code_hash, _id, cache_folder) if is_job_key and not self.is_run_exists_in_tar(cached_tar_file): _remove(cached_tar_file) raise Exception else: output = generate_md5sum(cached_tar_file) if output == source_code_hash: # checking is already downloaded folder's hash matches with the given hash self.folder_path_to_download[source_code_hash] = self.public_dir log(f"==> {name} is already cached within the public cache directory") else: self.gdrive_download_folder(name, key, source_code_hash, _id, cache_folder) elif self.folder_type_dict[source_code_hash] == "folder": tar_file = cache_folder / source_code_hash / name if os.path.isfile(tar_file): output = generate_md5sum(tar_file) if output == source_code_hash: # checking is already downloaded folder's hash matches with the given hash self.folder_path_to_download[source_code_hash] = self.public_dir log(f"==> {name} is already cached within the public cache directory") else: self.gdrive_download_folder(name, key, source_code_hash, _id, f"{self.public_dir}/{name}") else: self.gdrive_download_folder(name, key, source_code_hash, _id, f"{self.public_dir}/{name}")
def clean_before_submit(self): for folder in self.folders_to_share: if isinstance(folder, str): _remove(os.path.join(folder, ".mypy_cache"))
def gpg_encrypt(self, user_gpg_finderprint, target): is_delete = False if os.path.isdir(target): try: *_, encrypt_target = compress_folder(target) encrypted_file_target = f"{encrypt_target}.gpg" is_delete = True except Exception as e: print_tb(e) sys.exit(1) else: if not os.path.isfile(target): logging.error(f"{target} does not exist") sys.exit(1) else: encrypt_target = target encrypted_file_target = f"{target}.gpg" is_delete = True if os.path.isfile(encrypted_file_target): log(f"## gpg_file: {encrypted_file_target} is already created") return encrypted_file_target for attempt in range(5): try: cmd = [ "gpg", "--keyserver", "hkps://keyserver.ubuntu.com", "--recv-key", user_gpg_finderprint ] log(f"{br(attempt)} cmd: [magenta]{' '.join(cmd)}", "bold") run( cmd ) # this may not work if it is requested too much in a short time break except Exception as e: log(f"warning: {e}") time.sleep(30) try: cmd = [ "gpg", "--batch", "--yes", "--recipient", user_gpg_finderprint, "--trust-model", "always", "--output", encrypted_file_target, "--encrypt", encrypt_target, ] run(cmd) log(f"==> gpg_file=[magenta]{encrypted_file_target}") return encrypted_file_target except Exception as e: print_tb(e) if "encryption failed: Unusable public key" in str(e): log("#> Check solution: https://stackoverflow.com/a/34132924/2402577" ) finally: if is_delete: _remove(encrypt_target)
def remove_lock_files(self): _remove(f"{env.HOME}/.ipfs/repo.lock") _remove(f"{env.HOME}/.ipfs/datastore/LOCK")
def run(self) -> bool: self.start_time = time.time() if cfg.IS_THREADING_ENABLED: self.thread_log_setup() run_ipfs_daemon() log(f"{br(get_date())} Job's source code has been sent through ", "bold cyan", end="") if self.cloudStorageID[0] == StorageID.IPFS: log("[bold green]IPFS") else: log("[bold green]IPFS_GPG") if not is_ipfs_on(): return False log(f"==> is_hash_locally_cached={cfg.ipfs.is_hash_locally_cached(self.job_key)}" ) if not os.path.isdir(self.results_folder): os.makedirs(self.results_folder) _remove(f"{self.results_folder}/{self.job_key}") try: self.check_ipfs(self.job_key) except: return False self.registered_data_hashes = [] for idx, source_code_hash in enumerate(self.code_hashes): if self.cloudStorageID[idx] == StorageID.NONE: self.registered_data_hashes.append(source_code_hash) # GOTCHA else: ipfs_hash = bytes32_to_ipfs(source_code_hash) if ipfs_hash not in self.ipfs_hashes: try: # job_key as data hash already may added to the list self.check_ipfs(ipfs_hash) except: return False initial_folder_size = calculate_size(self.results_folder) for idx, ipfs_hash in enumerate(self.ipfs_hashes): # here scripts knows that provided IPFS hashes exists online is_hashed = False log(f"## attempting to get IPFS file: {ipfs_hash} ... ", end="") if cfg.ipfs.is_hash_locally_cached(ipfs_hash): is_hashed = True log(ok("already cached")) else: log() if idx == 0: target = self.results_folder else: # "_" added before the filename in case $ ipfs get <ipfs_hash> target = f"{self.results_data_folder}/_{ipfs_hash}" mkdir(target) is_storage_paid = False # TODO: should be set before by user input cfg.ipfs.get(ipfs_hash, target, is_storage_paid) if idx > 0: # https://stackoverflow.com/a/31814223/2402577 dst_filename = os.path.join(self.results_data_folder, os.path.basename(ipfs_hash)) if os.path.exists(dst_filename): _remove(dst_filename) shutil.move(target, dst_filename) target = dst_filename if self.cloudStorageID[idx] == StorageID.IPFS_GPG: cfg.ipfs.decrypt_using_gpg(f"{target}/{ipfs_hash}", target) try: _git.initialize_check(target) except Exception as e: raise e if not is_hashed: folder_size = calculate_size(self.results_folder) self.data_transfer_in_to_download_mb += folder_size - initial_folder_size initial_folder_size = folder_size if idx == 0 and not self.check_run_sh(): self.complete_refund() return False log(f"==> data_transfer_in={self.data_transfer_in_to_download_mb} MB | " f"rounded={int(self.data_transfer_in_to_download_mb)} MB") return self.sbatch_call()
def cache(self, folder_name, _id) -> bool: success = self._is_cached(folder_name, _id) cached_folder = Path("") if self.cache_type[_id] == CacheType.PRIVATE: # download into private directory at $HOME/.ebloc-broker/cache cached_folder = self.private_dir elif self.cache_type[_id] == CacheType.PUBLIC: cached_folder = self.public_dir cached_tar_file = cached_folder / f"{folder_name}.tar.gz" if success: self.folder_type_dict[folder_name] = "tar.gz" self.tar_downloaded_path[folder_name] = cached_tar_file return True if not os.path.isfile(cached_tar_file): if os.path.isfile(cached_folder / f"{folder_name}.tar.gz"): tar_hash = generate_md5sum( f"{cached_folder}/{folder_name}.tar.gz") if tar_hash == folder_name: # checking is already downloaded folder's hash matches with the given hash self.folder_type_dict[folder_name] = "folder" log( f"==> {folder_name} is already cached under the public directory", "bold blue") return True self.folder_type_dict[folder_name] = "tar.gz" try: self.eudat_download_folder(cached_folder, folder_name) except Exception as e: print_tb(e) self.complete_refund() return False else: self.folder_type_dict[folder_name] = "tar.gz" try: self.eudat_download_folder(cached_folder, folder_name) except Exception as e: print_tb(e) self.complete_refund() return False if (_id == 0 and self.folder_type_dict[folder_name] == "tar.gz" and not self.is_run_exists_in_tar(cached_tar_file)): _remove(cached_tar_file) return False else: # Here we already know that its tar.gz file self.folder_type_dict[folder_name] = "tar.gz" output = generate_md5sum(cached_tar_file) if output == folder_name: # checking is already downloaded folder's hash matches with the given hash log(f"==> {cached_tar_file} is already cached") self.tar_downloaded_path[folder_name] = cached_tar_file self.folder_type_dict[folder_name] = "tar.gz" return True try: self.eudat_download_folder(cached_folder, folder_name) except Exception as e: print_tb(e) self.complete_refund() return False return True
def submit_ipfs(job: Job, is_pass=False, required_confs=1): Ebb = cfg.Ebb requester = Ebb.w3.toChecksumAddress(job.requester_addr) provider = Ebb.w3.toChecksumAddress(job.provider_addr) pre_check(job, requester) log("==> Attemptting to submit a job") main_storage_id = job.storage_ids[0] job.folders_to_share = job.paths check_link_folders(job.data_paths, job.registered_data_files, is_pass=is_pass) if main_storage_id == StorageID.IPFS: log("==> Submitting source code through [blue]IPFS[/blue]") elif main_storage_id == StorageID.IPFS_GPG: log("==> Submitting source code through [blue]IPFS_GPG[/blue]") else: log("E: Please provide IPFS or IPFS_GPG storage type for the source code") sys.exit(1) targets = [] try: provider_info = Ebb.get_provider_info(provider) except Exception as e: print_tb(e) sys.exit(1) for idx, folder in enumerate(job.folders_to_share): if isinstance(folder, Path): target = folder if job.storage_ids[idx] == StorageID.IPFS_GPG: provider_gpg_finderprint = provider_info["gpg_fingerprint"] if not provider_gpg_finderprint: log("E: Provider did not register any GPG fingerprint") sys.exit(1) log(f"==> provider_gpg_finderprint={provider_gpg_finderprint}") try: # target is updated target = cfg.ipfs.gpg_encrypt(provider_gpg_finderprint, target) log(f"==> gpg_file={target}") except Exception as e: print_tb(e) sys.exit(1) try: ipfs_hash = cfg.ipfs.add(target) # ipfs_hash = ipfs.add(folder, True) # True includes .git/ run(["ipfs", "refs", ipfs_hash]) except Exception as e: print_tb(e) sys.exit(1) if idx == 0: key = ipfs_hash job.code_hashes.append(ipfs_to_bytes32(ipfs_hash)) job.code_hashes_str.append(ipfs_hash) log(f"==> ipfs_hash={ipfs_hash} | md5sum={generate_md5sum(target)}") if main_storage_id == StorageID.IPFS_GPG: # created gpg file will be removed since its already in ipfs targets.append(target) else: code_hash = folder if isinstance(code_hash, bytes): job.code_hashes.append(code_hash) job.code_hashes_str.append(code_hash.decode("utf-8")) # TODO: if its ipfs # if isinstance(code_hash, bytes): # code_hash = code_hash.decode("utf-8") # if len(code_hash) == 32: # value = cfg.w3.toBytes(text=code_hash) # job.code_hashes.append(value) # job.code_hashes_str.append(value.decode("utf-8")) # else: # job.code_hashes.append(ipfs_to_bytes32(code_hash)) # job.code_hashes_str.append(code_hash) # if idx != len(job.folders_to_share) - 1: # log("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-", "cyan") # requester inputs for testing purpose job.price, *_ = job.cost(provider, requester) try: tx_hash = Ebb.submit_job(provider, key, job, requester=requester, required_confs=required_confs) if required_confs >= 1: tx_receipt = get_tx_status(tx_hash) if tx_receipt["status"] == 1: processed_logs = Ebb._eBlocBroker.events.LogJob().processReceipt(tx_receipt, errors=DISCARD) try: if processed_logs: log("job_info:", "bold yellow") log(vars(processed_logs[0].args)) for target in targets: if ".tar.gz.gpg" in str(target): _remove(target) except IndexError: log(f"E: Tx={tx_hash} is reverted") else: pass except QuietExit: pass except Exception as e: print_tb(e) return tx_hash