def add_all(repo=None): """Add all into git.""" try: if not repo: repo = git.Repo(".", search_parent_directories=True) log("all files in the entire working tree are updated in the Git repository ", end="") repo.git.add(A=True) log(ok()) try: #: git diff HEAD --name-only | wc -l changed_file_len = len( repo.index.diff("HEAD", ignore_blank_lines=True, ignore_space_at_eol=True)) except: # if it is the first commit HEAD might not exist changed_file_len = len( repo.git.diff("--cached", "--ignore-blank-lines", "--ignore-space-at-eol", "--name-only").split("\n")) if changed_file_len > 0: log("Record changes to the repository ", end="") repo.git.commit("-m", "update") log(ok()) except Exception as e: print_tb(e) raise e
def is_repo(folders): for folder in folders: if not isinstance(folder, bytes): with cd(folder): if not is_initialized(folder): log(f"warning: .git does not exits in {folder}. Applying: git init ", end="") run(["git", "init", "--initial-branch=master"]) log(ok())
def initialize_check(path): """Validate if .git/ folder exist within the target folder.""" with cd(path): if not is_initialized(path): try: log(f"## git_repo={path}") log("Creating an empty Git repository ", end="") run(["git", "init", "--initial-branch=master"]) log(ok()) add_all() except Exception as e: log(f"E: {e}") raise e
def eudat_download_folder(self, results_folder_prev, folder_name): """Download corresponding folder from the EUDAT. Always assumes job is sent as .tar.gz file """ # TODO: check hash of the downloaded file is correct or not cached_tar_file = f"{results_folder_prev}/{folder_name}.tar.gz" log("#> downloading [green]output.zip[/green] for:", end="") log(f"{folder_name} => {cached_tar_file} ", "bold") key = folder_name share_key = f"{folder_name}_{self.requester_id[:16]}" for attempt in range(1): try: log("## Trying [blue]wget[/blue] approach...") token = self.share_id[share_key]["share_token"] if token: download_fn = f"{cached_tar_file.replace('.tar.gz', '')}_{self.requester_id}.download" cmd = [ "wget", "-O", download_fn, "-c", f"https://b2drop.eudat.eu/s/{token}/download", "-q", "--show-progres", "--progress=bar:force", ] log(" ".join(cmd), is_code=True, color="yellow") run(cmd) with cd(results_folder_prev): run(["unzip", "-o", "-j", download_fn]) _remove(download_fn) self.tar_downloaded_path[folder_name] = cached_tar_file log(f"## download file from eudat {ok()}") return except: log("E: Failed to download eudat file via wget.\nTrying config.oc.get_file() approach..." ) if config.oc.get_file(f"/{key}/{folder_name}.tar.gz", cached_tar_file): self.tar_downloaded_path[folder_name] = cached_tar_file log(ok()) return else: logging.error( f"E: Something is wrong, oc could not retrieve the file [attempt:{attempt}]" ) raise Exception("Eudat download error")
def get_tx_status(tx_hash, is_silent=False) -> TxReceipt: """Return status of the transaction.""" if not tx_hash: raise Exception("warning: tx_hash is empty") if isinstance(tx_hash, TransactionReceipt): tx_hash = tx_hash.txid if not is_silent: log(f"tx_hash={tx_hash}", "bold") try: tx_receipt = cfg.Ebb._wait_for_transaction_receipt(tx_hash, is_silent=is_silent) _tx_receipt = dict(tx_receipt) if not is_silent: with suppress(Exception): del _tx_receipt["logsBloom"] log("tx=", "bold", end="") log(_tx_receipt, max_depth=1) for idx, _log in enumerate( _tx_receipt["logs"]): # All logs fried under the tx _log = dict(_log) with suppress(Exception): del _log["data"] log(f"log_{idx}=", "bold blue", end="") log(_log) log("#> Is transaction successfully deployed? ", end="") if tx_receipt["status"] == 1: if not is_silent: log(ok()) else: if not is_silent: log() raise Exception("E: tx is reverted") return tx_receipt except Timeout as e: log(str(e)) raise e except Exception as e: raise e
def attemp_get_job_info(self): is_print = True sleep_time = 30 for attempt in range(10): # log(self.job_info) if self.job_info["stateCode"] == state.code["RUNNING"]: # it will come here eventually, when setJob() is deployed. Wait # until does values updated on the blockchain log("## job has been started") return if self.job_info["stateCode"] == state.code["COMPLETED"]: # detects an error on the slurm side log("warning: job is already completed and its money is received" ) self.get_job_info() raise QuietExit try: self.job_info = Ebb.get_job_info(env.PROVIDER_ID, self.job_key, self.index, self.job_id, self.received_block_number, is_print) is_print = False except Exception as e: print_tb(e) # sys.exit(1) # sleep here so this loop is not keeping CPU busy due to # start_code tx may deploy late into the blockchain. log( f"==> {br(attempt)} start_code tx of the job is not obtained yet, " f"waiting for {sleep_time} seconds to pass...", end="", ) sleep(sleep_time) log(ok()) log("E: failed all the attempts, abort") sys.exit(1)
def _wait_for_transaction_receipt(self, tx_hash, compact=False, is_silent=False) -> TxReceipt: """Wait till the tx is deployed.""" if isinstance(tx_hash, TransactionReceipt): tx_hash = tx_hash.txid tx_receipt = None attempt = 0 poll_latency = 3 if not is_silent: log(f"## waiting for the transaction({tx_hash}) receipt... ", end="") while True: try: tx_receipt = cfg.w3.eth.get_transaction_receipt(tx_hash) except TransactionNotFound as e: log() log(f"warning: {e}") except Exception as e: print_tb(str(e)) tx_receipt = None if tx_receipt and tx_receipt["blockHash"]: break if not is_silent: log() log(f"## attempt={attempt} | sleeping_for={poll_latency} seconds ", end="") attempt += 1 time.sleep(poll_latency) if not is_silent: log(ok()) if not compact: return tx_receipt else: return without_keys(tx_receipt, self.invalid)
def run(self) -> bool: self.start_time = time.time() if cfg.IS_THREADING_ENABLED: self.thread_log_setup() run_ipfs_daemon() log(f"{br(get_date())} Job's source code has been sent through ", "bold cyan", end="") if self.cloudStorageID[0] == StorageID.IPFS: log("[bold green]IPFS") else: log("[bold green]IPFS_GPG") if not is_ipfs_on(): return False log(f"==> is_hash_locally_cached={cfg.ipfs.is_hash_locally_cached(self.job_key)}" ) if not os.path.isdir(self.results_folder): os.makedirs(self.results_folder) _remove(f"{self.results_folder}/{self.job_key}") try: self.check_ipfs(self.job_key) except: return False self.registered_data_hashes = [] for idx, source_code_hash in enumerate(self.code_hashes): if self.cloudStorageID[idx] == StorageID.NONE: self.registered_data_hashes.append(source_code_hash) # GOTCHA else: ipfs_hash = bytes32_to_ipfs(source_code_hash) if ipfs_hash not in self.ipfs_hashes: try: # job_key as data hash already may added to the list self.check_ipfs(ipfs_hash) except: return False initial_folder_size = calculate_size(self.results_folder) for idx, ipfs_hash in enumerate(self.ipfs_hashes): # here scripts knows that provided IPFS hashes exists online is_hashed = False log(f"## attempting to get IPFS file: {ipfs_hash} ... ", end="") if cfg.ipfs.is_hash_locally_cached(ipfs_hash): is_hashed = True log(ok("already cached")) else: log() if idx == 0: target = self.results_folder else: # "_" added before the filename in case $ ipfs get <ipfs_hash> target = f"{self.results_data_folder}/_{ipfs_hash}" mkdir(target) is_storage_paid = False # TODO: should be set before by user input cfg.ipfs.get(ipfs_hash, target, is_storage_paid) if idx > 0: # https://stackoverflow.com/a/31814223/2402577 dst_filename = os.path.join(self.results_data_folder, os.path.basename(ipfs_hash)) if os.path.exists(dst_filename): _remove(dst_filename) shutil.move(target, dst_filename) target = dst_filename if self.cloudStorageID[idx] == StorageID.IPFS_GPG: cfg.ipfs.decrypt_using_gpg(f"{target}/{ipfs_hash}", target) try: _git.initialize_check(target) except Exception as e: raise e if not is_hashed: folder_size = calculate_size(self.results_folder) self.data_transfer_in_to_download_mb += folder_size - initial_folder_size initial_folder_size = folder_size if idx == 0 and not self.check_run_sh(): self.complete_refund() return False log(f"==> data_transfer_in={self.data_transfer_in_to_download_mb} MB | " f"rounded={int(self.data_transfer_in_to_download_mb)} MB") return self.sbatch_call()
def start_call(job_key, index, slurm_job_id): """Run when slurm job launches. * cmd1: scontrol show job slurm_job_id | \ grep 'StartTime'| grep -o -P '(?<=StartTime=).*(?= E)' * cmd2: date -d 2018-09-09T18:38:29 +"%s" """ Ebb = cfg.Ebb _log.ll.LOG_FILENAME = env.LOG_PATH / "transactions" / env.PROVIDER_ID / f"{job_key}_{index}.txt" _log.ll.IS_PRINT = False log(f"~/ebloc-broker/broker/start_code.py {job_key} {index} {slurm_job_id}") job_id = 0 # TODO: should be obtained from the user's input _, _, error = popen_communicate(["scontrol", "show", "job", slurm_job_id]) if "slurm_load_jobs error: Invalid job id specified" in str(error): log(f"E: {error}") sys.exit(1) p1 = Popen(["scontrol", "show", "job", slurm_job_id], stdout=PIPE) p2 = Popen(["grep", "StartTime"], stdin=p1.stdout, stdout=PIPE) p1.stdout.close() p3 = Popen( ["grep", "-o", "-P", "(?<=StartTime=).*(?= E)"], stdin=p2.stdout, stdout=PIPE, ) p2.stdout.close() date = p3.communicate()[0].decode("utf-8").strip() start_time = check_output(["date", "-d", date, "+'%s'"]).strip().decode("utf-8").strip("'") log(f"{env.EBLOCPATH}/broker/eblocbroker_scripts/set_job_status_running.py {job_key} {index} {job_id} {start_time}") for attempt in range(10): if attempt > 0: log(f"warning: sleeping for {cfg.BLOCK_DURATION * 2} ...") time.sleep(cfg.BLOCK_DURATION * 2) try: tx = Ebb.set_job_status_running(job_key, index, job_id, start_time) tx_hash = Ebb.tx_id(tx) log(f"tx_hash={tx_hash}", "bold") d = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log(f"==> set_job_status_running_started {start_time} | attempt_date={d}") log("## mongo.set_job_status_running_tx ", end="") if Ebb.mongo_broker.set_job_status_running_tx(str(job_key), int(index), str(tx_hash)): log(ok()) else: log(br("FAILED")) return except Exception as e: log(f"## attempt={attempt}: {e}") if ( "Execution reverted" in str(e) or "Transaction with the same hash was already imported" in str(e) or "If you wish to broadcast, include `allow_revert:True`" in str(e) ): log(f"warning: {e}") sys.exit(1) log("E: start_code.py failed at all the attempts, abort") sys.exit(1)
def run_driver(given_bn): """Run the main driver script for eblocbroker on the background.""" # dummy sudo command to get the password when session starts for only to # create users and submit the slurm job under another user run(["sudo", "printf", "hello"]) kill_process_by_name("gpg-agent") config.logging = setup_logger(_log.DRIVER_LOG) # driver_cancel_process = None try: from broker.imports import connect connect() Ebb: "Contract.Contract" = cfg.Ebb driver = Driver() except Exception as e: raise Terminate from e if not env.PROVIDER_ID: raise Terminate(f"PROVIDER_ID is None in {env.LOG_PATH}/.env") if not env.WHOAMI or not env.EBLOCPATH or not env.PROVIDER_ID: raise Terminate(f"Please run: {env.BASH_SCRIPTS_PATH}/folder_setup.sh") if not env.SLURMUSER: raise Terminate(f"SLURMUSER is not set in {env.LOG_PATH}/.env") try: deployed_block_number = Ebb.get_deployed_block_number() except Exception as e: raise e if not env.config["block_continue"]: env.config["block_continue"] = deployed_block_number if given_bn > 0: block_number_saved = int(given_bn) else: block_number_saved = env.config["block_continue"] if not isinstance(env.config["block_continue"], int): log("E: block_continue variable is empty or contains an invalid character") if not question_yes_no("#> Would you like to read from the contract's deployed block number?"): terminate() block_number_saved = deployed_block_number if deployed_block_number: env.config["block_continue"] = deployed_block_number else: raise Terminate(f"deployed_block_number={deployed_block_number} is invalid") _tools(block_number_saved) try: Ebb.is_contract_exists() except: terminate( "Contract address does not exist on the blockchain, is the blockchain sync?\n" f"block_number={Ebb.get_block_number()}", is_traceback=False, ) if cfg.IS_THREADING_ENABLED: log(f"## is_threading={cfg.IS_THREADING_ENABLED}") Ebb.is_eth_account_locked(env.PROVIDER_ID) log(f"==> whoami={env.WHOAMI}") log(f"==> log_file={_log.DRIVER_LOG}") log(f"==> rootdir={os.getcwd()}") log(f"==> is_web3_connected={Ebb.is_web3_connected()}") if not Ebb.does_provider_exist(env.PROVIDER_ID): # updated since cluster is not registered env.config["block_continue"] = Ebb.get_block_number() terminate( textwrap.fill( f"Your Ethereum address {env.PROVIDER_ID} " "does not match with any provider in eBlocBroker. Please register your " "provider using your Ethereum Address in to the eBlocBroker. You can " "use eblocbroker/register_provider.py script to register your provider." ), is_traceback=False, ) if not Ebb.is_orcid_verified(env.PROVIDER_ID): raise QuietExit(f"provider's ({env.PROVIDER_ID}) ORCID is not verified") blk_read = block_number_saved balance_temp = Ebb.get_balance(env.PROVIDER_ID) eth_balance = Ebb.eth_balance(env.PROVIDER_ID) log(f"==> deployed_block_number={deployed_block_number}") log(f"==> account_balance={eth_balance} gwei | {cfg.w3.fromWei(eth_balance, 'ether')} eth") log(f"==> Ebb_balance={balance_temp}") while True: wait_until_idle_core_available() time.sleep(0.2) if not str(blk_read).isdigit(): raise Terminate(f"block_read_from={blk_read}") balance = Ebb.get_balance(env.PROVIDER_ID) if cfg.IS_THREADING_ENABLED: _squeue() console_ruler() if isinstance(balance, int): value = int(balance) - int(balance_temp) if value > 0: log(f"==> Since Driver started provider_gained_wei={value}") current_bn = Ebb.get_block_number() log(f" * {get_date()} waiting new job to come since block_number={blk_read}") log(f"==> current_block={current_bn} | sync_from={blk_read}") flag = True while current_bn < int(blk_read): current_bn = Ebb.get_block_number() if flag: log(f"## Waiting block number to be updated, it remains constant at {current_bn}") flag = False time.sleep(2) log(f"#> [bold yellow]Passed incremented block number... Watching from block_number=[cyan]{blk_read}") blk_read = str(blk_read) # reading events' block number has been updated slurm.pending_jobs_check() try: driver.logged_jobs_to_process = Ebb.run_log_job(blk_read, env.PROVIDER_ID) driver.process_logged_jobs() if len(driver.logged_jobs_to_process) > 0 and driver.latest_block_number > 0: # updates the latest read block number blk_read = driver.latest_block_number + 1 env.config["block_continue"] = blk_read if not driver.is_provider_received_job: blk_read = env.config["block_continue"] = current_bn except Exception as e: log() log(f"E: {e}") if "Filter not found" in str(e) or "Read timed out" in str(e): # HTTPSConnectionPool(host='core.bloxberg.org', port=443): Read timed out. (read timeout=10) log("## sleeping for 60 seconds...", end="") time.sleep(60) log(ok()) else: print_tb(e)
def submit(provider, _from, job): try: job.check_account_status(_from) job.Ebb.is_provider_valid(provider) job.Ebb.is_requester_valid(_from) except Exception as e: raise e try: provider_info = job.Ebb.get_provider_info(provider) log(f"==> Provider's available_core_num={provider_info['available_core_num']}" ) log(f"==> Provider's price_core_min={provider_info['price_core_min']}") except Exception as e: raise QuietExit from e provider = job.Ebb.w3.toChecksumAddress(provider) provider_to_share = provider_info["email"] data_files_json_path = f"{job.tmp_dir}/meta_data.json" try: if len(job.folders_to_share) > 1: for folder_to_share in job.folders_to_share[1:]: if not isinstance(folder_to_share, bytes): # starting from the first element ignoring source_folder # attempting to share the data folder job_key, tar_hash, job.tar_hashes = share_folder( folder_to_share, provider_to_share, job.tmp_dir) job.foldername_tar_hash[folder_to_share] = tar_hash job.keys[tar_hash] = job_key if job.tmp_dir == "": print_tb("job.tmp_dir is empty") sys.exit() _dump_dict_to_file(data_files_json_path, job.keys) data_json = read_json(data_files_json_path) if data_json: log("## meta_data:") log(data_json) with suppress(Exception): data_json = read_json(data_files_json_path) if job.keys == data_json: log(f"## meta_data.json file matches with the given data keys {ok()}" ) else: log("warning: meta_data.json file does not match with the given data keys" ) folder_to_share = job.folders_to_share[0] if not isinstance(folder_to_share, bytes): job_key, tar_hash, job.tar_hashes = share_folder(folder_to_share, provider_to_share, job.tmp_dir, job_key_flag=True) job.foldername_tar_hash[folder_to_share] = tar_hash # add an element to the beginning of the dict since Python # 3.7. dictionaries are now ordered by insertion order. job.keys_final[tar_hash] = job_key job.keys_final.update(job.keys) job.keys = job.keys_final return job except Exception as e: print_tb(e) raise e finally: _dump_dict_to_file(data_files_json_path, job.keys) data_json = read_json(data_files_json_path) if data_json: log("## meta_data:") log(data_json) _id = None for (*_, v) in data_json.items(): _id = v break if _id: log("## updating meta_data ", end="") update_meta_data_gdrive(_id, data_files_json_path) log(ok())