def read_chars(self): dramas = self.db.get_all_dramas() with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: while len(dramas) > 0: try: futures = {} for drama in dramas: if drama.kanji_ok is 1: dramas.remove(drama) print("kanji_ok TRUE -> {} skipped".format( drama.uid)) continue futures[drama] = executor.submit( self.read_chars_worker, drama) dramas.remove(drama) if len(futures) > 10: break for future in concurrent.futures.as_completed( futures.values()): chars = future.result() self.db.push_chars_count(chars) except Error as e: exception(e) self.db.push_chars()
def extract_everything(): """Extract everything in components.yml, respecting order requirements.""" def q_key(comp): """Decide extract priority by pointer-chase depth, filesize in ties.""" after = {c.install_after: c.name for c in component.ALL.values()} name, seen = comp.name, [] while name in after: seen.append(name) name = after.get(name) if name in seen: raise ValueError('Cyclic "install_after" config detected: ' + " -> ".join(seen + [name])) return len(seen), os.path.getsize(comp.path) queue = list(component.ALL.values()) + [ component.ALL["Dwarf Fortress"]._replace(name=path, extract_to=path) for path in ("curr_baseline", "graphics/ASCII") ] queue.sort(key=q_key, reverse=True) with concurrent.futures.ProcessPoolExecutor(8) as pool: futures = {} while queue: while sum(f.running() for f in futures.values()) < 8: for idx, comp in enumerate(queue): if comp.extract_to is False: assert comp.filename.endswith(".ini") queue.pop(idx) continue # for Therapist, handled in build.py aft = futures.get(comp.install_after) # Even if it's highest-priority, wait for parent job(s) if aft is None or aft.done(): futures[comp.name] = extract_comp(pool, queue.pop(idx)) break # reset index or we might pop the wrong item else: break # if there was nothing eligible to extract, sleep time.sleep(0.01) failed = [k for k, v in futures.items() if v.exception() is not None] for key in failed: comp = component.ALL.pop(key, None) for lst in (component.FILES, component.GRAPHICS, component.UTILITIES): if comp in lst: lst.remove(comp) if failed: print("ERROR: Could not extract: " + ", ".join(failed))
def identify_decision_models_parallel( model: ForSyDeModel, rules: List[IdentificationRuleType] = _get_registered_rules(), concurrent_idents: int = os.cpu_count() or 1, ) -> List[DecisionModel]: """ This function runs the Design Space Identification scheme, as presented in paper [DSI-DATE'2021], so that problems can be automatically solved from the given input model. It also uses parallelism to run as many identifications as possible simultaneously. If the argument **problems** is not passed, the API uses all subclasses found during runtime that implement the interfaces DecisionModel and Explorer. """ max_iterations = len(model) * len(rules) allowed_rules = [rule for rule in rules] identified: List[DecisionModel] = [] iterations = 0 with concurrent.futures.ProcessPoolExecutor( max_workers=concurrent_idents) as executor: while len(allowed_rules) > 0 and iterations < max_iterations: # generate all trials and keep track of which subproblem # made the trial futures = { rule: executor.submit(rule, model, identified) for rule in allowed_rules } concurrent.futures.wait(futures.values()) for rule in futures: (fixed, subprob) = futures[rule].result() # join with the identified if subprob: identified.append(subprob) # take away candidates at fixpoint if fixed: allowed_rules.remove(rule) iterations += 1 return identified
def read_lines(self): line_id = 0 subfolders = DccUtils.get_subfolders(self.args["path"]) with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: try: while len(subfolders) > 0: futures = {} for subfolder in subfolders: futures[subfolder] = executor.submit( self.line_ref_worker, subfolder) subfolders.remove(subfolder) if len(futures) > 15: break for future in concurrent.futures.as_completed( futures.values()): lines = future.result() for line in lines: line.uid = line_id line_id += 1 self.db.push_lines(lines) except Error as e: exception(e)
def extract_everything(): """Extract everything in components.yml, respecting order requirements.""" def q_key(comp): """Decide extract priority by pointer-chase depth, filesize in ties.""" after = {c.install_after: c.name for c in component.ALL.values()} name, score = comp.name, 0 while name in after: name, score = after.get(name), score + 1 return score, os.path.getsize(comp.path) queue = list(component.ALL.values()) + [ component.ALL['Dwarf Fortress']._replace(name=path, extract_to=path) for path in ('curr_baseline', 'graphics/ASCII')] queue.sort(key=q_key, reverse=True) with concurrent.futures.ProcessPoolExecutor(8) as pool: futures = dict() while queue: while queue and sum(f.running() for f in futures.values()) < 8: for idx, comp in enumerate(queue): aft = comp.install_after if not aft or (aft in futures and futures[aft].done()): futures[comp.name] = extract_comp(pool, queue.pop(idx)) time.sleep(0.01)
def _cancel_all_rpcs(futures: Mapping[int, grpc.Future]) -> None: logger.info("Cancelling all remaining RPCs") for future in futures.values(): future.cancel()
def execute_together( cls, remotes, # type: typing.Iterable[SSHClientBase] command, # type: str timeout=constants.DEFAULT_TIMEOUT, # type: typing.Union[int, None] expected=None, # type: typing.Optional[typing.Iterable[int]] raise_on_err=True, # type: bool **kwargs ): # type: (...) -> typing.Dict[typing.Tuple[str, int], exec_result.ExecResult] """Execute command on multiple remotes in async mode. :param remotes: Connections to execute on :type remotes: typing.Iterable[SSHClientBase] :param command: Command for execution :type command: str :param timeout: Timeout for command execution. :type timeout: typing.Union[int, None] :param expected: expected return codes (0 by default) :type expected: typing.Optional[typing.Iterable[]] :param raise_on_err: Raise exception on unexpected return code :type raise_on_err: bool :return: dictionary {(hostname, port): result} :rtype: typing.Dict[typing.Tuple[str, int], exec_result.ExecResult] :raises ParallelCallProcessError: Unexpected any code at lest on one target :raises ParallelCallExceptions: At lest one exception raised during execution (including timeout) .. versionchanged:: 1.2.0 default timeout 1 hour .. versionchanged:: 1.2.0 log_mask_re regex rule for masking cmd """ @threaded.threadpooled def get_result(): # type: () -> exec_result.ExecResult """Get result from remote call.""" ( chan, _, stderr, stdout, ) = remote.execute_async(command, **kwargs) # type: _type_execute_async chan.status_event.wait(timeout) exit_code = chan.recv_exit_status() # pylint: disable=protected-access cmd_for_log = remote._mask_command(cmd=command, log_mask_re=kwargs.get( 'log_mask_re', None)) # pylint: enable=protected-access result = exec_result.ExecResult(cmd=cmd_for_log) result.read_stdout(src=stdout) result.read_stderr(src=stderr) result.exit_code = exit_code chan.close() return result expected = expected or [proc_enums.ExitCodes.EX_OK] expected = proc_enums.exit_codes_to_enums(expected) futures = {} results = {} errors = {} raised_exceptions = {} for remote in set(remotes): # Use distinct remotes futures[remote] = get_result() ( _, not_done, ) = concurrent.futures.wait( list(futures.values()), timeout=timeout ) # type: typing.Set[concurrent.futures.Future], typing.Set[concurrent.futures.Future] for future in not_done: # pragma: no cover future.cancel() for ( remote, future, ) in futures.items(): # type: SSHClientBase, concurrent.futures.Future try: result = future.result() results[(remote.hostname, remote.port)] = result if result.exit_code not in expected: errors[(remote.hostname, remote.port)] = result except Exception as e: raised_exceptions[(remote.hostname, remote.port)] = e if raised_exceptions: # always raise raise exceptions.ParallelCallExceptions(command, raised_exceptions, errors, results, expected=expected) if errors and raise_on_err: raise exceptions.ParallelCallProcessError(command, errors, results, expected=expected) return results
raise RuntimeError( "Invalid AWS Link. Please get a new snapchat memories.json\n") else: returnDict["link"] = r.text returnDict["type"] = dataType return returnDict links = [] with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: futures = {} for date in data: futures[date] = executor.submit(geturl, data[date]["link"], date, data[date]["type"]) for future in concurrent.futures.as_completed(futures.values()): try: result = future.result() links.append(result) """ print(result) """ except Exception: print( f"Exception when processing result, most likely failure in AWS Link, please make sure you are using the newest snapchat memories link {futures[future]}" ) """ print(links) """ def set_default(obj): if isinstance(obj, set): return list(obj) raise TypeError
def run_pipeline(file_list, intermediate_local, target_bucket, target_dir, n_parts, overwrite): # AWS sts = boto3.client('sts') sts.get_caller_identity() # check credentials s3 = boto3.client('s3') transfer_config = TransferConfig(use_threads=True) logger.info("Starting download of %s files", len(file_list)) # Calculate file size # Skip exisiting files if specified total_size = 0 file_size_dict = dict() logger.info("Calculating total file size...") for file_name in file_list: s3_key = os.path.join(target_dir, file_name) if overwrite is False: if check_file_on_s3(s3_client=s3, bucket=target_bucket, key=s3_key) is True: logger.info( "Skipping: %s because `overwrite` = False was specified", file_name) continue size_check = get_url_size(file_name) file_size_dict[file_name] = size_check total_size += size_check n_files = len(file_size_dict) logger.info("Number of files to be downloaded: %s", n_files) if n_files == 0: logger.info("No files to process. Ending execution.") return logger.info("Total file size is: %s MB", round(total_size/1024/1024, 1)) logger.info("Number of parts to be created: %s", n_files * n_parts) # Download -> concatenate (if needed) -> upload -> remove Path(intermediate_local).mkdir(parents=True, exist_ok=True) for f in file_size_dict: size = file_size_dict[f] start = 0 part_size = int(size / n_parts) logger.info("%s: file size ~ %s MB, part size ~ %s MB", f, round(size/1024/1024, 1), round((part_size)/1024/1024, 1)) with concurrent.futures.ThreadPoolExecutor() as executor: futures = {} for i in range(1, n_parts+1): end = min(start + part_size, size) part_file_name = f"{f.split('.7z')[0]}_part{i}of{n_parts}.7z" part_download = executor.submit(download_file, source_name=f, destination_name=part_file_name, destination_dir=intermediate_local, headers={"Range": f"bytes={start}-{end}"}) futures[part_download] = part_file_name start = end + 1 file = concatenate_parts(file_name=f, directory=intermediate_local, parts_list=list(futures.values()), remove=True) upload_file(s3_client=s3, path=file, bucket=target_bucket, key=os.path.join(target_dir, f), Config=transfer_config) remove_file(file)