def sbatch_call(self): link = Link(self.results_data_folder, self.results_data_link) try: if len(self.registered_data_hashes) > 0: # in case there there mounted folder first umount them in order # to give folder permission link.umount(self.registered_data_hashes) # folder permissions should be applied before linking the # folders in case there is a read-only folder. file permission for # the requester's foders should be reset give_rwe_access(self.requester_id, self.results_folder_prev) give_rwe_access(env.WHOAMI, self.results_folder_prev) # give_rwe_access(self.requester_id, self.requester_home) # give_rwe_access(env.WHOAMI, self.requester_home) if calculate_size(self.results_data_folder, _type="bytes") > 0: link.link_folders() if len(self.registered_data_hashes) > 0: link.registered_data(self.registered_data_hashes) self._sbatch_call() except Exception as e: print_tb(f"E: Failed to call _sbatch_call() function. {e}") raise e
def upload(self, source_code_hash, *_): with suppress(Exception): # first time uploading uploaded_file_size = eudat.get_size( f_name=f"{source_code_hash}/{self.patch_upload_name}") size_in_bytes = calculate_size(self.patch_file, _type="bytes") if uploaded_file_size == float(size_in_bytes): log(f"==> {self.patch_file} is already uploaded") return try: _data_transfer_out = calculate_size(self.patch_file) log(f"==> {br(source_code_hash)}.data_transfer_out={_data_transfer_out}MB" ) self.data_transfer_out += _data_transfer_out eudat.upload_results(self.encoded_share_tokens[source_code_hash], self.patch_upload_name, self.patch_folder, max_retries=5) except Exception as e: raise e
def initialize_folder(folder_to_share, requester_name) -> str: dir_path = os.path.dirname(folder_to_share) tar_hash, *_ = compress_folder(folder_to_share) tar_source = f"{dir_path}/{tar_hash}.tar.gz" try: config.oc.mkdir(f"{tar_hash}_{requester_name}") except Exception as e: if "405" not in str(e): if not os.path.exists(f"{env.OWNCLOUD_PATH}/{tar_hash}_{requester_name}"): try: os.makedirs(f"{env.OWNCLOUD_PATH}/{tar_hash}_{requester_name}") except Exception as e: raise e else: log("==> folder is already created") else: log("==> folder is already created") try: tar_dst = f"{tar_hash}_{requester_name}/{tar_hash}.tar.gz" log("## uploading into [green]EUDAT B2DROP[/green] this may take some time depending on the file size...") is_already_uploaded = False with suppress(Exception): # File is first time created file_info = config.oc.file_info(f"./{tar_dst}") size = calculate_size(tar_source, _type="bytes") log(file_info, "bold") if float(file_info.attributes["{DAV:}getcontentlength"]) == size: # check is it already uploaded or not via its file size log(f"## {tar_source} is already uploaded into [green]EUDAT B2DROP") is_already_uploaded = True if not is_already_uploaded: config.oc.put_file(f"./{tar_dst}", tar_source) os.remove(tar_source) except Exception as e: if type(e).__name__ == "HTTPResponseError": try: shutil.copyfile(tar_source, f"{env.OWNCLOUD_PATH}/{tar_dst}") except Exception as e: raise e else: raise Exception("oc could not connected in order to upload the file") # noqa return tar_hash
def upload(self, key, is_job_key): """Upload generated result into gdrive. :param key: key of the shared gdrive file :returns: True if upload is successful """ try: if not is_job_key: meta_data = gdrive.get_data_key_ids(self.results_folder_prev) key = meta_data[key] cmd = [ env.GDRIVE, "info", "--bytes", key, "-c", env.GDRIVE_METADATA ] gdrive_info = subprocess_call(cmd, 5, sleep_time=30) except Exception as e: raise Exception( f"{WHERE(1)} E: {key} does not have a match. meta_data={meta_data}. {e}" ) from e mime_type = gdrive.get_file_info(gdrive_info, "Mime") log(f"mime_type={mime_type}") self.data_transfer_out += calculate_size(self.patch_file) log(f"data_transfer_out={self.data_transfer_out} MB =>" f" rounded={int(self.data_transfer_out)} MB") if "folder" in mime_type: cmd = [ env.GDRIVE, "upload", "--parent", key, self.patch_file, "-c", env.GDRIVE_METADATA ] elif "gzip" in mime_type or "/zip" in mime_type: cmd = [ env.GDRIVE, "update", key, self.patch_file, "-c", env.GDRIVE_METADATA ] else: raise Exception("Files could not be uploaded") try: log(subprocess_call(cmd, 5)) except Exception as e: print_tb(e) raise Exception("E: gdrive could not upload the file") from e
def gdrive_download_folder(self, name, key, source_code_hash, _id, cache_folder) -> bool: log(f"{WHERE(1)}") if self._is_cached(source_code_hash, _id): return True is_continue = False with suppress(Exception): output = self.job_infos[0]["storage_duration"][_id] is_continue = True if is_continue and not self.job_infos[0]["is_cached"][source_code_hash] and not output: log("## Downloaded as temporary data file", "bold yellow") self.folder_path_to_download[source_code_hash] = self.results_folder_prev else: self.folder_path_to_download[source_code_hash] = cache_folder # self.assign_folder_path_to_download(_id, source_code_hash, cache_folder) log(f"## downloading => {key}\nPath to download => {self.folder_path_to_download[source_code_hash]}") if self.folder_type_dict[source_code_hash] == "folder": try: folder = self.folder_path_to_download[source_code_hash] subprocess_call( ["gdrive", "download", "--recursive", key, "--force", "--path", folder], 10, ) except Exception as e: raise e downloaded_folder_path = f"{self.folder_path_to_download[source_code_hash]}/{name}" if not os.path.isdir(downloaded_folder_path): # check before move operation raise Exception(f"E: Folder ({downloaded_folder_path}) is not downloaded successfully") self.data_transfer_in_requested = calculate_size(downloaded_folder_path) logging.info( f"data_transfer_in_requested={self.data_transfer_in_requested} MB | " f"Rounded={int(self.data_transfer_in_requested)} MB" ) else: try: folder = self.folder_path_to_download[source_code_hash] cmd = ["gdrive", "download", key, "--force", "--path", folder] subprocess_call(cmd, 10) except Exception as e: raise e file_path = f"{self.folder_path_to_download[source_code_hash]}/{name}" if not os.path.isfile(file_path): raise Exception(f"{WHERE(1)} E: File {file_path} is not downloaded successfully") filename = f"{self.folder_path_to_download[source_code_hash]}/{name}" p1 = subprocess.Popen( [ "ls", "-ln", filename, ], stdout=subprocess.PIPE, ) p2 = subprocess.Popen(["awk", "{print $5}"], stdin=p1.stdout, stdout=subprocess.PIPE) p1.stdout.close() # type: ignore # returns downloaded files size in bytes self.data_transfer_in_requested = byte_to_mb(p2.communicate()[0].decode("utf-8").strip()) logging.info( f"data_transfer_in_requested={self.data_transfer_in_requested} MB |" f" Rounded={int(self.data_transfer_in_requested)} MB" )
def set_config(self, fn): if not os.path.isfile(fn): log(f"E: {fn} file does not exist") raise QuietExit self.cfg = Yaml(fn) self.requester_addr = self.cfg["config"]["requester_address"] self.provider_addr = self.cfg["config"]["provider_address"] self.source_code_storage_id = storage_id = self.cfg["config"][ "source_code"]["storage_id"] self.storage_ids.append(STORAGE_IDs[storage_id]) if storage_id == StorageID.NONE: self.add_empty_data_item() else: cache_type = self.cfg["config"]["source_code"]["cache_type"] self.cache_types.append(CACHE_TYPES[cache_type]) source_code_path = Path( os.path.expanduser(self.cfg["config"]["source_code"]["path"])) size_mb = calculate_size(source_code_path) self.paths.append(source_code_path) self.data_transfer_ins.append(size_mb) self.storage_hours.append( self.cfg["config"]["source_code"]["storage_hours"]) self.data_prices_set_block_numbers.append(0) duplicate_path_check = {} data_file_keys = [] # data files are re-ordered, if a registered data is requested from # provider(StorageID.NONE) they are added to end of list. This will help # during patching for the data files ignoring data files with storage_id # None at the end of list for data_key in reversed(self.cfg["config"]["data"]): if "path" in self.cfg["config"]["data"][data_key]: _path = self.cfg["config"]["data"][data_key]["path"] with suppress(Exception): if duplicate_path_check[_path]: _exit(f"E: {_path} exists as duplicate item") duplicate_path_check[_path] = True if "hash" in self.cfg["config"]["data"][data_key]: data_file_keys.append(data_key) else: # priority given to data that from local folder data_file_keys.insert(0, data_key) for key in data_file_keys: is_data_hash = False if "hash" in self.cfg["config"]["data"][key]: # process on the registered data of the provider data_hash = self.cfg["config"]["data"][key]["hash"] if len(data_hash) == 32: data_hash = data_hash.encode() self.paths.append(data_hash) self.registered_data_files.append(data_hash) self.add_empty_data_item() self.is_registered_data_requested[data_hash] = True is_data_hash = True else: storage_id = self.cfg["config"]["data"][key]["storage_id"] self.storage_ids.append(STORAGE_IDs[storage_id]) if storage_id == StorageID.NONE: self.add_empty_data_item() else: cache_type = self.cfg["config"]["data"][key]["cache_type"] self.cache_types.append(CACHE_TYPES[cache_type]) path = Path( os.path.expanduser( self.cfg["config"]["data"][key]["path"])) size_mb = calculate_size(path) self.paths.append(path) self.data_paths.append(path) self.data_transfer_ins.append(size_mb) self.storage_hours.append( self.cfg["config"]["data"][key]["storage_hours"]) self.data_prices_set_block_numbers.append(0) if is_data_hash and not is_data_registered(self.provider_addr, data_hash): raise Exception( f"## requested({data_hash}) data is not registered into provider" ) self.cores = [] self.run_time = [] for key in self.cfg["config"]["jobs"]: self.cores.append(self.cfg["config"]["jobs"][key]["cores"]) self.run_time.append(self.cfg["config"]["jobs"][key]["run_time"]) self.data_transfer_out = self.cfg["config"]["data_transfer_out"] if source_code_path: self.tmp_dir = source_code_path.parent.absolute() else: self.tmp_dir = Path( os.path.expanduser(self.cfg["config"]["tmp_dir"])) self.set_cache_types(self.cache_types)
def run(self) -> bool: self.start_time = time.time() if cfg.IS_THREADING_ENABLED: self.thread_log_setup() run_ipfs_daemon() log(f"{br(get_date())} Job's source code has been sent through ", "bold cyan", end="") if self.cloudStorageID[0] == StorageID.IPFS: log("[bold green]IPFS") else: log("[bold green]IPFS_GPG") if not is_ipfs_on(): return False log(f"==> is_hash_locally_cached={cfg.ipfs.is_hash_locally_cached(self.job_key)}" ) if not os.path.isdir(self.results_folder): os.makedirs(self.results_folder) _remove(f"{self.results_folder}/{self.job_key}") try: self.check_ipfs(self.job_key) except: return False self.registered_data_hashes = [] for idx, source_code_hash in enumerate(self.code_hashes): if self.cloudStorageID[idx] == StorageID.NONE: self.registered_data_hashes.append(source_code_hash) # GOTCHA else: ipfs_hash = bytes32_to_ipfs(source_code_hash) if ipfs_hash not in self.ipfs_hashes: try: # job_key as data hash already may added to the list self.check_ipfs(ipfs_hash) except: return False initial_folder_size = calculate_size(self.results_folder) for idx, ipfs_hash in enumerate(self.ipfs_hashes): # here scripts knows that provided IPFS hashes exists online is_hashed = False log(f"## attempting to get IPFS file: {ipfs_hash} ... ", end="") if cfg.ipfs.is_hash_locally_cached(ipfs_hash): is_hashed = True log(ok("already cached")) else: log() if idx == 0: target = self.results_folder else: # "_" added before the filename in case $ ipfs get <ipfs_hash> target = f"{self.results_data_folder}/_{ipfs_hash}" mkdir(target) is_storage_paid = False # TODO: should be set before by user input cfg.ipfs.get(ipfs_hash, target, is_storage_paid) if idx > 0: # https://stackoverflow.com/a/31814223/2402577 dst_filename = os.path.join(self.results_data_folder, os.path.basename(ipfs_hash)) if os.path.exists(dst_filename): _remove(dst_filename) shutil.move(target, dst_filename) target = dst_filename if self.cloudStorageID[idx] == StorageID.IPFS_GPG: cfg.ipfs.decrypt_using_gpg(f"{target}/{ipfs_hash}", target) try: _git.initialize_check(target) except Exception as e: raise e if not is_hashed: folder_size = calculate_size(self.results_folder) self.data_transfer_in_to_download_mb += folder_size - initial_folder_size initial_folder_size = folder_size if idx == 0 and not self.check_run_sh(): self.complete_refund() return False log(f"==> data_transfer_in={self.data_transfer_in_to_download_mb} MB | " f"rounded={int(self.data_transfer_in_to_download_mb)} MB") return self.sbatch_call()