def scontrol_update(self, job_core_num, sbatch_file_path, time_limit): """Prevent scontrol update locked exception. scontrol generates: Job update not available right now, the DB index is being set, try again in a bit for job 5. """ try: _slurm_job_id = self.submit_slurm_job(job_core_num, sbatch_file_path) slurm_job_id = _slurm_job_id.split()[3] cmd = [ "scontrol", "update", f"jobid={slurm_job_id}", f"TimeLimit={time_limit}" ] subprocess_call(cmd, attempt=10, sleep_time=10) return slurm_job_id except Exception as e: print_tb(e) raise e
def upload(self, key, is_job_key): """Upload generated result into gdrive. :param key: key of the shared gdrive file :returns: True if upload is successful """ try: if not is_job_key: meta_data = gdrive.get_data_key_ids(self.results_folder_prev) key = meta_data[key] cmd = [ env.GDRIVE, "info", "--bytes", key, "-c", env.GDRIVE_METADATA ] gdrive_info = subprocess_call(cmd, 5, sleep_time=30) except Exception as e: raise Exception( f"{WHERE(1)} E: {key} does not have a match. meta_data={meta_data}. {e}" ) from e mime_type = gdrive.get_file_info(gdrive_info, "Mime") log(f"mime_type={mime_type}") self.data_transfer_out += calculate_size(self.patch_file) log(f"data_transfer_out={self.data_transfer_out} MB =>" f" rounded={int(self.data_transfer_out)} MB") if "folder" in mime_type: cmd = [ env.GDRIVE, "upload", "--parent", key, self.patch_file, "-c", env.GDRIVE_METADATA ] elif "gzip" in mime_type or "/zip" in mime_type: cmd = [ env.GDRIVE, "update", key, self.patch_file, "-c", env.GDRIVE_METADATA ] else: raise Exception("Files could not be uploaded") try: log(subprocess_call(cmd, 5)) except Exception as e: print_tb(e) raise Exception("E: gdrive could not upload the file") from e
def pre_data_check(self, key): if self.data_transfer_in_to_download > self.data_transfer_in_requested: # TODO: full refund raise Exception( "Requested size to download the source_code and data files is greater than the given amount" ) try: cmd = ["gdrive", "info", "--bytes", key, "-c", env.GDRIVE_METADATA] return subprocess_call(cmd, 1) except Exception as e: # TODO: gdrive list --query "sharedWithMe" print_tb(e) raise e
def _ipfs_stat(self, ipfs_hash, _is_ipfs_on=True): """Return stats of the give IPFS hash. This function *may* run for an indetermined time. Returns a dict with the size of the block with the given hash. """ if _is_ipfs_on and not is_ipfs_on(): raise IpfsNotConnected with cfg.console.status( f"$ ipfs object stat {ipfs_hash} --timeout={cfg.IPFS_TIMEOUT}s" ): return subprocess_call([ "ipfs", "object", "stat", ipfs_hash, f"--timeout={cfg.IPFS_TIMEOUT}s" ])
def gdrive_download_folder(self, name, key, source_code_hash, _id, cache_folder) -> bool: log(f"{WHERE(1)}") if self._is_cached(source_code_hash, _id): return True is_continue = False with suppress(Exception): output = self.job_infos[0]["storage_duration"][_id] is_continue = True if is_continue and not self.job_infos[0]["is_cached"][source_code_hash] and not output: log("## Downloaded as temporary data file", "bold yellow") self.folder_path_to_download[source_code_hash] = self.results_folder_prev else: self.folder_path_to_download[source_code_hash] = cache_folder # self.assign_folder_path_to_download(_id, source_code_hash, cache_folder) log(f"## downloading => {key}\nPath to download => {self.folder_path_to_download[source_code_hash]}") if self.folder_type_dict[source_code_hash] == "folder": try: folder = self.folder_path_to_download[source_code_hash] subprocess_call( ["gdrive", "download", "--recursive", key, "--force", "--path", folder], 10, ) except Exception as e: raise e downloaded_folder_path = f"{self.folder_path_to_download[source_code_hash]}/{name}" if not os.path.isdir(downloaded_folder_path): # check before move operation raise Exception(f"E: Folder ({downloaded_folder_path}) is not downloaded successfully") self.data_transfer_in_requested = calculate_size(downloaded_folder_path) logging.info( f"data_transfer_in_requested={self.data_transfer_in_requested} MB | " f"Rounded={int(self.data_transfer_in_requested)} MB" ) else: try: folder = self.folder_path_to_download[source_code_hash] cmd = ["gdrive", "download", key, "--force", "--path", folder] subprocess_call(cmd, 10) except Exception as e: raise e file_path = f"{self.folder_path_to_download[source_code_hash]}/{name}" if not os.path.isfile(file_path): raise Exception(f"{WHERE(1)} E: File {file_path} is not downloaded successfully") filename = f"{self.folder_path_to_download[source_code_hash]}/{name}" p1 = subprocess.Popen( [ "ls", "-ln", filename, ], stdout=subprocess.PIPE, ) p2 = subprocess.Popen(["awk", "{print $5}"], stdin=p1.stdout, stdout=subprocess.PIPE) p1.stdout.close() # type: ignore # returns downloaded files size in bytes self.data_transfer_in_requested = byte_to_mb(p2.communicate()[0].decode("utf-8").strip()) logging.info( f"data_transfer_in_requested={self.data_transfer_in_requested} MB |" f" Rounded={int(self.data_transfer_in_requested)} MB" )
def get_data(self, key, _id, is_job_key=False): try: mime_type, name = self.get_data_init(key, _id, is_job_key) except Exception as e: print_tb(e) raise e if is_job_key: gdrive_info = self.pre_data_check(key) name = gdrive.get_file_info(gdrive_info, "Name") mime_type = gdrive.get_file_info(gdrive_info, "Mime") # folder is already stored by its source_code_hash source_code_hash = name.replace(".tar.gz", "") log(f"==> name={name}") log(f"==> mime_type=[magenta]{mime_type}") if _id == 0: # source code folder, ignore downloading result-* name = f"{name}.tar.gz" try: output = gdrive.get_file_id(key) except Exception as e: print_tb(e) raise e key = echo_grep_awk(output, name, "1") mime_type = "gzip" if "gzip" in mime_type: try: cmd = ["gdrive", "info", "--bytes", key, "-c", env.GDRIVE_METADATA] gdrive_info = subprocess_call(cmd, 10) except Exception as e: print_tb(e) raise e source_code_hash = gdrive.get_file_info(gdrive_info, "Md5sum") self.md5sum_dict[key] = source_code_hash log(f"==> md5sum={self.md5sum_dict[key]}") # recieved job is in folder tar.gz self.folder_type_dict[source_code_hash] = "gzip" try: self.cache(_id, name, source_code_hash, key, is_job_key) except Exception as e: print_tb(e) raise e if is_job_key: target = self.results_folder else: target = f"{self.results_data_folder}/{source_code_hash}" mkdir(target) try: cache_folder = self.folder_path_to_download[source_code_hash] untar(f"{cache_folder}/{name}", target) return target except Exception as e: print_tb(e) raise e self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}") elif "folder" in mime_type: #: received job is in folder format self.folder_type_dict[source_code_hash] = "folder" try: self.cache(_id, name, source_code_hash, key, is_job_key) except Exception as e: raise e cache_folder = self.folder_path_to_download[source_code_hash] cmd = [ "rsync", "-avq", "--partial-dir", "--omit-dir-times", f"{cache_folder}/{name}/", self.results_folder, ] try: output = run(cmd) except Exception as e: print_tb(e) raise e self.remove_downloaded_file(source_code_hash, _id, f"{cache_folder}/{name}/") tar_file = f"{self.results_folder}/{name}.tar.gz" try: untar(tar_file, self.results_folder) _remove(tar_file) return target except Exception as e: print_tb(e) raise e else: raise Exception("Neither folder or gzip type given.")
def size(key, mime_type, folder_name, gdrive_info, results_folder_prev, code_hashes, is_cached): source_code_key = None size_to_download = 0 if "folder" not in mime_type: raise Exception try: output = get_file_id(key) log(f"==> data_id=[magenta]{key}") log(output, "bold green") data_files_id = fetch_grive_output(output, "meta_data.json") # key for the source_code elimination output*.tar.gz files source_code_key = fetch_grive_output(output, f"{folder_name}.tar.gz") if not data_files_id: raise Exception cmd = [ "gdrive", "download", "--recursive", data_files_id, # first id is meta_data "--force", "--path", results_folder_prev, ] output = subprocess_call(cmd, 10) print(output) cmd = [ "gdrive", "info", "--bytes", source_code_key, "-c", env.GDRIVE_METADATA, ] gdrive_info = subprocess_call(cmd, 10) except Exception as e: print_tb(e) # TODO: gdrive list --query "sharedWithMe" raise e md5sum = get_file_info(gdrive_info, "Md5sum") _source_code_hash = code_hashes[0].decode("utf-8") if md5sum != _source_code_hash: # checks md5sum obtained from gdrive and given by the user raise Exception( f"E: md5sum does not match with the provided data {source_code_key}" ) log(f"SUCCESS on folder={md5sum}", "bold green") byte_size = int(get_file_info(gdrive_info, "Size")) log(f"## code_hashes[0] == {_source_code_hash} | size={byte_size} bytes") if not is_cached[code_hashes[0].decode("utf-8")]: size_to_download += byte_size try: meta_data = get_data_key_ids(results_folder_prev) except Exception as e: raise e data_key_dict = {} if len(meta_data.items()) > 1: idx = 0 for (k, v) in meta_data.items(): if idx == 0: # first item is for the source-code itself _key = str(v) output = get_file_id(_key) data_key = fetch_grive_output(output, f"{k}.tar.gz") cmd = [ "gdrive", "info", "--bytes", data_key, "-c", env.GDRIVE_METADATA ] gdrive_info = subprocess_call(cmd, 10) log(f" * gdrive_info for [green]{k}[/green]:") log(gdrive_info, "bold yellow") idx += 1 else: # should start from the first index _key = str(v) try: output = get_file_id(_key) data_key = fetch_grive_output(output, f"{k}.tar.gz") cmd = [ "gdrive", "info", "--bytes", data_key, "-c", env.GDRIVE_METADATA ] gdrive_info = subprocess_call(cmd, 10) except Exception as e: raise e md5sum = get_file_info(gdrive_info, _type="Md5sum") log(f" * gdrive_info for [green]{k}[/green]:") log(gdrive_info, "bold yellow") given_code_hash = code_hashes[idx].decode("utf-8") log(f"==> given_code_hash={given_code_hash} idx={idx}") if md5sum != given_code_hash: # checks md5sum obtained from gdrive and given by the user raise Exception( f"E: md5sum does not match with the provided data{br(idx)}\n" f"md5sum={md5sum} | given={given_code_hash}") data_key_dict[md5sum] = data_key _size = int(get_file_info(gdrive_info, "Size")) log(f"==> code_hashes{br(idx)} == {code_hashes[idx].decode('utf-8')} size={_size} bytes" ) byte_size += _size if not is_cached[code_hashes[idx].decode("utf-8")]: size_to_download += _size if bool(data_key_dict): data_link_file = f"{results_folder_prev}/meta_data_link.json" with open(data_link_file, "w") as f: json.dump(data_key_dict, f) else: raise Exception("E: Something is wrong. data_key_dict is empty") output = byte_to_mb(size_to_download) logging.info( f"Total_size={byte_size} bytes | size to download={size_to_download} bytes => {output} MB" ) return output, data_key_dict, source_code_key