def playlist_dl(playlist_id): data=get_playlist(playlist_id) print('Start Dowdloading {0}.'.format(data['name'])) path=create_dir(data) write_info_json(path,data) songs=get_songs_info(path,data) global ID_WIDTH ID_WIDTH=len(str(len(songs))) print('Dowdloading songs...') tqdm.get_lock() with tqdm(total=len(songs),ncols=70) as pbar: pool=ThreadPoolExecutor(max_workers=MAX_POOL) tasks=[pool.submit(download_song,path,i+1,song) for i,song in enumerate(songs)] for _ in as_completed(tasks): pbar.update()
def outer_function(**config): """Outer function running inner function for each task in input dict""" freeze_support() # for Windows support tqdm.set_lock(RLock()) with concurrent.futures.ThreadPoolExecutor(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), ), max_workers=3) as executor: results_list = [] outer_loop_kwarg = { 'total': len(config['package']['tasks']), 'desc': 'Outer', 'ascii': True, 'position': len(config['package']['tasks']), 'leave': True } with tqdm(**outer_loop_kwarg) as out_progress: futuresListComp = [ executor.submit(inner_function, **node) for node in config['package']['tasks'] ] # Update after each completed task for future in concurrent.futures.as_completed(futuresListComp): out_progress.update() results_list.append(future.result()) return results_list
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--datapath", type=str, default="data") parser.add_argument("--data_type", type=str, default="train") parser.add_argument("--pilot_version", type=int, choices=[1, 2], default=1) parser.add_argument("--processes", type=int, default=4) parser.add_argument("--data_nums", type=int, default=64) parser.add_argument("--seed", type=int, default=43) parser.add_argument("--mode", type=int, choices=[0, 1, 2], default=None) parser.add_argument("--SNRdb", type=float, default=None) parser.add_argument("--with_pure_y", action='store_true') parser.add_argument("--debug", action='store_true') args = parser.parse_args() H, Htest = read_data(args.datapath) using_H = H if args.data_type == "train" else Htest generate_data_fix = partial(generate_data, args=args, H=using_H) tqdm.set_lock(RLock()) with Pool(processes=args.processes, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as pool: [ pool.map(generate_data_fix, range(args.processes * i, args.processes * (i + 1))) for i in range(args.data_nums // args.processes) ]
def _do_epoch(self, data_iter, is_train, batches_count, name=None): self.on_epoch_begin(is_train, name, batches_count=batches_count) progress_bar_class = ConsoleProgressBar if self._use_tqdm: try: from tqdm import tqdm tqdm.get_lock().locks = [] progress_bar_class = tqdm except: pass with torch.autograd.set_grad_enabled(is_train): with progress_bar_class(total=batches_count) as progress_bar: try: for _ in range(batches_count): batch = next(data_iter) batch_progress = self.on_batch(batch) progress_bar.update() progress_bar.set_description(batch_progress) except StopIteration: pass epoch_progress = self.on_epoch_end() progress_bar.set_description(epoch_progress) progress_bar.refresh()
def scrape(): global filename start_time = datetime.now() # Building the filename filename = str(filename).replace("$DATE", start_time.strftime("%Y%m%d%H%M%S")) search = str(args.search).replace(" ", "") if len(search) > 10: search = search[0:9] filename = str(filename).replace("$SEARCH", search) func_args = [] stats_dict = {} if args.engines and len(args.engines) > 0: eng = args.engines[0] for e in eng: try: if not (args.exclude and len(args.exclude) > 0 and e in args.exclude[0]): func_args.append("{}:{}".format(e, args.search)) stats_dict[e] = 0 except KeyError: print("Error: search engine {} not in the list of supported engines".format(e)) else: for e in supported_engines.keys(): if not (args.exclude and len(args.exclude) > 0 and e in args.exclude[0]): func_args.append("{}:{}".format(e, args.search)) stats_dict[e] = 0 # Doing multiprocessing units = min((cpu_count() - 1), len(func_args)) if args.mp_units and args.mp_units > 0: units = min(args.mp_units, len(func_args)) print("search.py started with {} processing units...".format(units)) freeze_support() results = {} with Pool(units, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: results_map = p.map(run_method, func_args) results = reduce(lambda a, b: a + b if b is not None else a, results_map) stop_time = datetime.now() if not args.continuous_write: with open(filename, 'w', newline='') as csv_file: csv_writer = csv.writer(csv_file, delimiter=field_delim, quoting=csv.QUOTE_ALL) for r in results: write_to_csv(csv_writer, r) total = 0 print("\nReport:") print(" Execution time: %s seconds" % (stop_time - start_time)) print(" Results per engine:") for r in results: stats_dict[r['engine']] += 1 for s in stats_dict: n = stats_dict[s] print(" {}: {}".format(s, str(n))) total += n print(" Total: {} links written to {}".format(str(total), filename))
def proc_2(): cp = CustomPool() with cp.Pool(n_process=4, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: for result in tqdm(p.imap_unordered(progresser_2, range(10)), total=10): pass cp.update()
def main(args): with open(args.vectors_file, mode="r") as fp: vectors_file = json.load(fp) vectors = vectors_file["vectors"] vectors_dir = os.path.dirname(args.vectors_file) jobs_list = list(combinations(vectors.keys(), 2)) splitted_jobs = split_jobs(jobs_list, args.threads) logging.debug("Jobs count: {}".format(len(jobs_list))) logging.debug(" For each worker: {}".format([len(splitted) for splitted in splitted_jobs])) vectorbase_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["base"]["path"])) if not os.path.isfile(vectorbase_path): logging.error("VectorBase file '{}' does not exist".format(vectorbase_path)) sys.exit(1) scores = {} freeze_support() # for Windows support with Pool(args.threads, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as pool: api_vector = ApiVector(vectorbase_path) logging.info("Calculating the scores...") results = [] for i in range(args.threads): task_args = (i, args.threads, api_vector, splitted_jobs[i], vectors, args.verbose) results.append(pool.apply_async(worker_function, args=task_args)) pool.close() pool.join() logging.info(" COMPLETED") logging.info("Merging the results... ") scores = merge_dictionaries([res.get() for res in results]) logging.info(" COMPLETED") out_dir = os.path.join(os.path.dirname(vectors_dir), "scores") base = vectors_file["base"] if args.out_file is None: args.out_file = os.path.join(out_dir, "scores_{}_{}_{}.json".format(base["imports_type"], base["weights"], base["size"])) if not os.path.exists(os.path.dirname(args.out_file)): try: os.makedirs(os.path.dirname(args.out_file)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise dataset_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["dataset"])) scores_dict = { "dataset": os.path.relpath(dataset_path, out_dir), "base": vectors_file["base"], "scores": scores } scores_dict["base"]["path"] = os.path.relpath(vectorbase_path, out_dir) with open(args.out_file, mode="w") as opf: logging.info("Dumping scores to '{}'... ".format(os.path.basename(args.out_file))) json.dump(scores_dict, opf, indent=4) logging.info(" COMPLETED")
def prog_map(elms, f, desc="Synth", chunksize=1,procs=8,order=True): with tqdm(elms, desc=desc) as t: with multiprocessing.Pool(procs, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: if (order): pool = list(p.imap(f, t, chunksize=chunksize)) else: pool = list(p.imap_unordered(f, t, chunksize=chunksize)) return pool
def dfu_flash(dfu, dfu_file, queue, pos=0): '''Flash a list of DFU devices with the given file''' snum = dfu.get_string(dfu.dev.iSerialNumber) # Clear left-over errors if dfu.get_status()[1] == kidfu.DfuState.DFU_ERROR: dfu.clear_status() # Flash blocks = [ dfu_file.data[i:i + 64] for i in range(0, len(dfu_file.data), 64) ] with tqdm.get_lock(): progress = tqdm(total=len(blocks), unit='B', unit_scale=64, miniters=0, desc=colorize(snum, colorama.Fore.CYAN), position=pos, dynamic_ncols=True, leave=True, smoothing=0) for bnum, block in enumerate(blocks): try: dfu.write(bnum, block) status = dfu.wait_while_state(kidfu.DfuState.DFU_DOWNLOAD_BUSY) if status[1] != kidfu.DfuState.DFU_DOWNLOAD_IDLE: queue.put('%s: Error %d' % (snum, status[1])) return except usb.core.USBError: queue.put('%s: USB error' % snum) return with tqdm.get_lock(): progress.update(1) with tqdm.get_lock(): progress.refresh() progress.close() dfu.leave() status = dfu.get_status() if status[1] == kidfu.DfuState.DFU_MANIFEST_SYNC: queue.put('%s: OK' % snum) return queue.put('%s: Error finish' % snum)
def test_multiprocessing_1(): hyper = Hyperactive( distribution={ "multiprocessing": { "initializer": tqdm.set_lock, "initargs": (tqdm.get_lock(), ), } }) hyper.add_search(objective_function, search_space, n_iter=15, n_jobs=2) hyper.run()
def get_songs_info(path,data): path_json=os.path.join(path,'mini_info.json') if os.path.exists(path_json): with open(path_json) as f: return json.load(f) songs=[] print('Geting songs information...') tqdm.get_lock() with tqdm(total=len(data['tracks']),ncols=70) as pbar: pool=ThreadPoolExecutor(max_workers=MAX_POOL) tasks=[pool.submit(get_one_song_info,songs,i,x) for i,x in enumerate(data['tracks'])] for state in as_completed(tasks): if state._result: pbar.update() songs=sorted(songs,key=lambda x: x[0]) songs=[x[1] for x in songs] with open(path_json,'w') as f: json.dump(songs,f) return songs
def parallel_ilr_inference(nb_jobs=50, **kwargs): kwargs_list = [] for n in range(nb_jobs): kwargs['seed'] = n kwargs_list.append(kwargs.copy()) with Pool(processes=min(nb_jobs, nb_cores), initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as p: res = p.map(_job, kwargs_list) return res
def spawn_processes(init_fn, read_fn, write_fn, num_readers=1, num_writers=1): """Start readers and writers.""" tqdm.set_lock(RLock()) write_q = Queue() write_fn = partial(write_fn, lock=tqdm.get_lock(), write_q=write_q) writers = [ Process(daemon=True, target=partial(write_fn, pos=i)) for i in range(num_writers, 0, -1) ] read_q = Queue() read_fn = partial(read_fn, lock=tqdm.get_lock(), read_q=read_q, write_q=write_q) readers = [ Process(daemon=True, target=partial(read_fn, pos=i)) for i in range(num_readers + 1, num_writers, -1) ] for p in readers + writers: p.start() init_fn(read_q) notify_and_join(read_q, readers) notify_and_join(write_q, writers)
def scp_operation(args): credentials = Credentials() targets = args.hostname.split(",") tqdm.set_lock(RLock()) with Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as pool: pool.starmap( scp_process, zip( targets, repeat(credentials), repeat(args.filename), repeat(args.dst_file_path), list(range(len(targets))), ), )
def parallel_ilr_inference(nb_jobs=50, **kwargs): kwargs_list = [] for n in range(nb_jobs): _kwargs = {'seed': kwargs['arguments'].seed, 'train_input': kwargs['train_input'][n], 'train_target': kwargs['train_target'][n], 'arguments': kwargs['arguments']} kwargs_list.append(_kwargs) with Pool(processes=min(nb_jobs, nb_cores), initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: res = p.map(_job, kwargs_list) return res
def process_archives(archive_links: List[str], target_dir: str, processes_number: int) -> None: """ Download tar files and untar them to target directory :param archive_links: List of tar files links :param target_dir: Target directory :param processes_number: Number of processes which will be used in multiprocessing download :return: None """ os.makedirs(target_dir, exist_ok=True) pool = Pool(processes=processes_number, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) process_archive_with_arg = partial(process_archive, target_dir) for _ in tqdm(pool.imap(process_archive_with_arg, enumerate(archive_links)), desc="process archives", total=len(archive_links), position=0): pass
def __init__( self, verbosity=["progress_bar", "print_results", "print_times"], distribution={ "multiprocessing": { "initializer": tqdm.set_lock, "initargs": (tqdm.get_lock(),), } }, ): if verbosity is False: verbosity = [] self.verbosity = verbosity self.distribution = distribution self.search_ids = [] self.process_infos = {} self.objFunc2results = {} self.search_id2results = {}
def __init__( self, verbosity=["progress_bar", "print_results", "print_times"], distribution={ "multiprocessing": { "initializer": tqdm.set_lock, "initargs": (tqdm.get_lock(), ), } }, n_processes="auto", ): super().__init__() if verbosity is False: verbosity = [] self.verbosity = verbosity self.distribution = distribution self.n_processes = n_processes self.search_ids = [] self.process_infos = {} self.progress_boards = {}
def compute_by_window(imgs, func, window_size=16, step=2, dst_dtype=np.float32, n_worker=12): """ 画像を一部を切り取り、func 関数で行った計算結果を 返却する Parameters ---------- imgs : numpy.ndarray or tuple of numpy.ndarray 入力画像 tuple で複数画像を与える場合、各画像に対して 同じ領域を切り取り、処理を行うため、各画像の 縦、横サイズは一致している必要がある func : callable object 切り取った画像の一部に対して何らかの計算を行う 関数。引数として画像の一部が渡される。 window_size : int or tuple of int 画像を切り取るサイズ。 int を指定した場合は、縦横同じサイズで切り取る。 tuple(int, int) を指定した場合は、縦横で異なったサイズ で切り取り、指定する順序は ndarray の次元に対応する step : int or tuple of int 切り取り間隔 int を指定した場合は、縦横同じ間隔を開けて処理をする tuple(int, int) を指定した場合は、縦横で異なった間隔 を開けて処理を行い、指定する順序は ndarray の次元に 対応する dst_dtype : type, default numpy.float32 返却値のデータ型 n_worker : int, default 4 並列するプロセス数 Returns ------- numpy.ndarray 各切り取り画像に対する処理結果の行列 """ # TYPE ASSERTION TYPE_ASSERT(imgs, [np.ndarray, tuple]) TYPE_ASSERT(window_size, [int, tuple]) TYPE_ASSERT(step, [int, tuple]) TYPE_ASSERT(dst_dtype, type) TYPE_ASSERT(n_worker, int) if isinstance(imgs, np.ndarray): imgs = tuple([imgs]) for img in imgs: TYPE_ASSERT(img, np.ndarray) for i in range(len(imgs) - 1): SAME_SHAPE_ASSERT(imgs[i], imgs[i + 1]) n_imgs = len(imgs) height, width = imgs[0].shape[:2] assert callable(func) and n_args(func) >= n_imgs, \ "argument 'func' must be callable object which has {0} argument at least. \n".format(n_imgs) + \ " ( num of argumets of 'func' depends on argument 'imgs')" if isinstance(step, int): step = tuple([step] * 2) if isinstance(window_size, int): window_size = tuple([window_size] * 2) s_i, s_j = step w_w, w_h = window_size results_shape = ceil(height / s_i), ceil(width / s_j) # Add padding to input images eprint("Add padding ... ") imgs = [ np.pad( img, pad_width=[ tuple([w_w // 2]), tuple([w_h // 2]), ] + [] if img.ndim == 2 else [tuple([0])], mode="constant", constant_values=0 ) for img in imgs ] if n_worker == 1: results = np.ndarray(results_shape, dtype=dst_dtype) for ii, i in tqdm(enumerate(range(w_h // 2, height + w_h // 2, s_i)), total=results_shape[0]): for jj, j in tqdm(enumerate(range(w_w // 2, width + w_w // 2, s_j)), total=results_shape[1], leave=False): rois = [ img[ get_window_rect( img.shape, center=(j, i), wnd_size=(w_w, w_h), ret_type="slice" ) ] for img in imgs ] results[ii][jj] = func(*rois) else: global _func global _callee _func = func def _callee(_imgs, _func, _width, _s_j, _w_w, _n_loop): _worker_id = current_process()._identity[0] _desc = f"Worker #{_worker_id:3d}" _results = list() for jj, j in tqdm(enumerate(range(_w_w // 2, _width + _w_w // 2, _s_j)), total=_n_loop, desc=_desc, position=_worker_id, leave=False): _rois = [ # _roi[:, j:j + _w_w] _roi[ get_window_rect( _roi.shape, center=(j, -1), wnd_size=(_w_w, -1), ret_type="slice" ) ] for _roi in _imgs ] _results.append(_func(*_rois)) return _results progress_bar = tqdm(total=results_shape[0], position=0) def _update_progressbar(arg): progress_bar.update() cp = CustomPool() pool = cp.Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) results = list() for ii, i in enumerate(range(w_h // 2, height + w_h // 2, s_i)): rois = [ img[ get_window_rect( img.shape, center=(-1, i), wnd_size=(-1, w_h), ret_type="slice" ) ] for img in imgs ] results.append( pool.apply_async( _callee, args=(rois, func, width, s_j, w_h, results_shape[1]), callback=_update_progressbar ) ) pool.close() pool.join() cp.update() results = np.array( [result.get() for result in results], dtype=dst_dtype ) return results
# we think we know about other bars (currently only py3 threading) if n == 6: tqdm.write("n == 6 completed") if __name__ == '__main__': freeze_support() # for Windows support L = list(range(NUM_SUBITERS))[::-1] print("Manual nesting") for i in trange(16, desc="1"): for _ in trange(16, desc="2 @ %d" % i, leave=i % 2): sleep(0.01) print("Multi-processing") p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) p.map(progresser, L) # unfortunately need ncols # to print spaces over leftover multi-processing bars (#796) with tqdm(leave=False) as t: ncols = t.ncols or 80 print(("{msg:<{ncols}}").format(msg="Multi-threading", ncols=ncols)) # explicitly set just threading lock for nonblocking progress tqdm.set_lock(RLock()) with ThreadPoolExecutor() as p: progresser_thread = partial( progresser, write_safe=not PY2, blocking=False) p.map(progresser_thread, L)
delimiter=',', fieldnames=OUTPUT_FIELDS) output_csv.writerow(dict((fn, fn) for fn in OUTPUT_FIELDS)) # Gather all rows into memory all_input_rows = [input_row for input_row in input_csv] # Split rows into NUM_CORES chunks for parallel processing input_chunks = numpy.array_split(numpy.array(all_input_rows), NUM_CORES) input_chunks_with_index = [(index, chunk) for index, chunk in enumerate(input_chunks)] # Create pool of workers pool = mp.Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), ), processes=NUM_CORES) # Apply transformation in parallel print("Starting transformation with %s workers..." % NUM_CORES) output_chunks = pool.starmap(batch_tranform_to_rayyan, input_chunks_with_index) # Wrap up workers pool.close() pool.join() # Write output to file for chunk in output_chunks: for row in chunk: output_csv.writerow(row)
def classify(self): """ エッジ画素分類を行う - テンプレートマッチングにより 「端点」「分岐点」を探索 - テンプレートにマッチしなかったエッジ画素は 「通過点」とする Returns ------- numpy.ndarray エッジ画素分類結果 """ self.check_image() BG, FG = self.BG, self.FG k_size = self.K_SIZE logger = self.logger img = self.img # 幅 1 のパディングを追加 img = np.pad(img, pad_width=k_size // 2, mode="constant", constant_values=BG) height, width = img.shape[:2] progress_bar = tqdm(total=height, position=0) def _update_progressbar(arg): progress_bar.update() cp = CustomPool() pool = cp.Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) results = list() # 全画素ループ (tqdm で進捗可視化) for i in trange((k_size // 2), height - (k_size // 2), desc="Height", leave=False): roi = img[i:i + k_size, :] results.append( pool.apply_async(self._classify_pixel, args=(roi, ), callback=_update_progressbar)) pool.close() pool.join() cp.update() self.classified = np.array([result.get() for result in results], dtype=self.classified.dtype) if logger: logger.logging_img(self.classified, "classified") logger.logging_img(self.get_as_image(), "classified_visualized") return self.classified
def render_parallel(num_jobs, scene, frame_window=None, **kwargs): import functools import multiprocessing from multiprocessing import RLock import h5py from tqdm import tqdm from gwpv.scene_configuration import animate, parse_as logger = logging.getLogger(__name__) # Infer frame window if needed if "FreezeTime" in scene["Animation"]: frame_window = (0, 1) elif frame_window is None: if "Crop" in scene["Animation"]: max_animation_length = (scene["Animation"]["Crop"][1] - scene["Animation"]["Crop"][0]) else: waveform_file_and_subfile = parse_as.file_and_subfile( scene["Datasources"]["Waveform"]) with h5py.File(waveform_file_and_subfile[0], "r") as waveform_file: waveform_times = waveform_file[ waveform_file_and_subfile[1]]["Y_l2_m2.dat"][:, 0] max_animation_length = waveform_times[-1] - waveform_times[0] logger.debug( f"Inferred max. animation length {max_animation_length}M" " from waveform data.") frame_window = ( 0, animate.num_frames( max_animation_length=max_animation_length, animation_speed=scene["Animation"]["Speed"], frame_rate=scene["Animation"]["FrameRate"], ), ) logger.debug(f"Inferred total frame window: {frame_window}") num_frames = frame_window[1] - frame_window[0] frames_per_job = int(num_frames / num_jobs) extra_frames = num_frames % num_jobs logger.debug(f"Using {num_jobs} jobs with {frames_per_job} frames per job" f" ({extra_frames} jobs render an additional frame).") frame_windows = [] distributed_frames = frame_window[0] for i in range(num_jobs): frames_this_job = frames_per_job + (1 if i < extra_frames else 0) frame_windows.append( (distributed_frames, distributed_frames + frames_this_job)) distributed_frames += frames_this_job logger.debug(f"Frame windows: {frame_windows}") tqdm.set_lock(RLock()) pool = multiprocessing.Pool(num_jobs, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) from gwpv.render.frames import _render_frame_window render_frame_window = functools.partial(_render_frame_window, scene=scene, **kwargs) pool.starmap(render_frame_window, enumerate(frame_windows))
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import pickle, sys, random, time, logging, argparse from fnmatch import fnmatch from copy import deepcopy from typing import List, Tuple, Optional, Text, FrozenSet from abc import abstractmethod from operator import itemgetter from collections import defaultdict from itertools import chain from tqdm import tqdm # work around pypy bug https://bitbucket.org/pypy/pypy/issues/2953/deadlock tqdm.get_lock().locks = [] import yaml from .layout import defaultLayouts, ButtonCombination, Layer, KeyboardLayout, GenericLayout from .carpalx import Carpalx, models, ModelParams from .writer import Writer from .util import first from .keyboard import defaultKeyboards, LetterButton class Annealer: """ Simulated annealing. Override .mutate() to suit your needs. Uses exponential cooling (10^(-progress*factor))
def _initialize(proc_name): tqdm.set_lock(tqdm.get_lock()) current_process().name = proc_name
def log_progress(items, prefix=None, total=None): # https://github.com/tqdm/tqdm/issues/461#issuecomment-334343230 tqdm.get_lock().locks = [] return tqdm(items, desc=prefix, total=total)
if __name__ == "__main__": input_csv = csv.DictReader(open(sys.argv[1], 'r', encoding='utf-8', errors='ignore'), delimiter=',') output_csv = csv.DictWriter(open(sys.argv[2], "w+"), delimiter=',', fieldnames=OUTPUT_FIELDS) output_csv.writerow(dict((fn, fn) for fn in OUTPUT_FIELDS)) # Gather all rows into memory all_input_rows = [input_row for input_row in input_csv] # Split rows into NUM_CORES chunks for parallel processing input_chunks = numpy.array_split(numpy.array(all_input_rows), NUM_CORES) input_chunks_with_index = [(index, chunk) for index, chunk in enumerate(input_chunks)] # Create pool of workers pool = mp.Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),), processes=NUM_CORES) # Apply transformation in parallel print("Starting transformation with %s workers..." % NUM_CORES) output_chunks = pool.starmap(batch_tranform_to_rayyan, input_chunks_with_index) # Wrap up workers pool.close() pool.join() # Write output to file for chunk in output_chunks: for row in chunk: output_csv.writerow(row) print("Complete.")
def find_color_threshold_in_hsv(self, img, ground_truth, precision=10): """ HSV 色空間における色閾値探索 - RGB → HSV 変換 を行う - HSV の各チャンネルで閾値処理を行い統合する - 正解データを用いて精度評価を行う Parameters ---------- img : numpy.ndarray 入力画像 (8-Bit RGB カラー) ground_truth : numpy.ndarray 正解データ (1-Bit) precision : int 閾値計算の精度 Returns ------- reasonable_params : dict F値が最も高くなったときの閾値 result : numpy.ndarray その際の閾値処理結果画像 (1-Bit 2値画像) Notes ----- `ground_truth`: - 1-Bit (bool 型) 2値画像 - 黒:背景、白:被害領域 `precision`: - precision=N のとき、H, S, V の各チャンネルに対し 2N ずつにパーセンタイル分割を行う """ global _worker_find_color_threshold_in_hsv # Worker methods executed parallel @worker_exception_raisable def _worker_find_color_threshold_in_hsv(_img, _masked, _q_h, _q_s): # Value used in tqdm _worker_id = current_process()._identity[0] _desc = f"Worker #{_worker_id:3d}" # Unpack arguments _q_h_low, _q_h_high = _q_h _q_s_low, _q_s_high = _q_s # Split image to each channne _img_h, _img_s, _img_v = [_img[:, :, i] for i in range(3)] _masked_h, _masked_s, _masked_v = [_masked[:, i] for i in range(3)] # Initialize variables reasonable_params = { "Score": { "F Score": -1, }, "Range": -1 } # Find thresholds for _q_v_low, _q_v_high in tqdm(list( product(np.linspace(50 / precision, 50, precision), repeat=2)), desc=_desc, position=_worker_id, leave=False): # Generate result _h_min, _h_max = self._in_range_percentile( _masked_h, (_q_h_low, _q_h_high)) _s_min, _s_max = self._in_range_percentile( _masked_s, (_q_s_low, _q_s_high)) _v_min, _v_max = self._in_range_percentile( _masked_v, (_q_v_low, _q_v_high)) _result = (((_h_min <= _img_h) & (_img_h <= _h_max)) & ((_s_min <= _img_s) & (_img_s <= _s_max)) & ((_v_min <= _img_v) & (_img_v <= _v_max))) # Calculate score _cm, _metrics = evaluation_by_confusion_matrix( _result, ground_truth) # Update reasonable_params if _metrics["F Score"] > reasonable_params["Score"]["F Score"]: reasonable_params = { "Score": _metrics, "Confusion Matrix": _cm, "Range": { "H": (_h_min, _h_max, _q_h_low, _q_h_high), "S": (_s_min, _s_max, _q_s_low, _q_s_high), "V": (_v_min, _v_max, _q_v_low, _q_v_high), } } return reasonable_params # Check arguments NDARRAY_ASSERT(img, ndim=3, dtype=np.uint8) NDARRAY_ASSERT(ground_truth, ndim=2, dtype=np.bool) SAME_SHAPE_ASSERT(img, ground_truth, ignore_ndim=True) # Convert RGB -> HSV img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # `masked`: `img` masked by `ground_truth` masked = img[ground_truth] # Percentile Split Q = list(product(np.linspace(50 / precision, 50, precision), repeat=4)) # `progress_bar`: whole progress bar progress_bar = tqdm(total=len(Q), position=0) def _update_progressbar(arg): progress_bar.update() # Initialize process pool cp = CustomPool() pool = cp.Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) results = list() # Multi-Processing ! for q_h_low, q_h_high, q_s_low, q_s_high in Q: results.append( pool.apply_async(_worker_find_color_threshold_in_hsv, args=(img, masked, (q_h_low, q_h_high), (q_s_low, q_s_high)), callback=_update_progressbar)) pool.close() pool.join() cp.update() # Resolve results try: results = [result.get() for result in results] except Exception as e: print(e) # Get result whose F-Score is max in results reasonable_params = max(results, key=lambda e: e["Score"]["F Score"]) img_h, img_s, img_v = [img[:, :, i] for i in range(3)] h_min, h_max, _, _ = reasonable_params["Range"]["H"] s_min, s_max, _, _ = reasonable_params["Range"]["S"] v_min, v_max, _, _ = reasonable_params["Range"]["V"] # Generate image using reasonable thresholds result = (((h_min <= img_h) & (img_h <= h_max)) & ((s_min <= img_s) & (img_s <= s_max)) & ((v_min <= img_v) & (img_v <= v_max))) # Logging if self.logger: self.logger.logging_dict(reasonable_params, "color_thresholds_in_hsv", sub_path=self.logger_sub_path) self.logger.logging_img(result, "meanshift_thresholded", sub_path=self.logger_sub_path) return reasonable_params, result
def process_collection(collection): fid = collection["foreign_id"] fid = fid.replace("/", "") fname = f"./dataset_components/{fid}.json" if os.path.exists(fname): return try: components = calculate_components(collection) except AlephException as e: print(f"Aleph Error: {fid}: {e}") return with open(fname, "w+") as fd: data = { "components_histogram": dict(components), "collection": collection, } fd.write(json.dumps(data)) if __name__ == "__main__": init_aleph() collections = api.filter_collections("*") N = collections.result["total"] tqdm.set_lock(mp.RLock()) with mp.Pool(processes=4, initializer=init_aleph, initargs=(tqdm.get_lock(),)) as p: results = p.imap_unordered(process_collection, collections, chunksize=32) for _ in tqdm(results, total=N, position=0): pass
def find_ms_params(n): file_name = f"aerial_roi{n}.png" src = imread_with_error( join(ROOT_DIR_SRC, file_name) ) ans = imread_with_error( join(ROOT_DIR_ANS, file_name) ) ms_params = sum([ [ { "spatial_radius": sp, "range_radius": sr, "min_density": 0 } for sr in np.arange(SR_RANGE[0], SR_RANGE[0]+SR_RANGE[1], SR_RANGE[2]) ] for sp in np.arange(SP_RANGE[0], SP_RANGE[0]+SP_RANGE[1], SP_RANGE[2]) ], []) progress_bar = tqdm(total=len(ms_params), position=0) def _update_progressbar(arg): progress_bar.update() cp = CustomPool() pool = cp.Pool(n_process=6, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) results = list() for params in ms_params: results.append( pool.apply_async( func_worker, args=(src, ), kwds=params, callback=_update_progressbar ) ) pool.close() pool.join() cp.update() results = [result.get() for result in results] results = sorted( [ ( sp, sr, np.sum( np.abs(segmented - ans) ) ) for segmented, sp, sr in results ], key=lambda e: e[0] ) pprint(results) with open(f"tmp/find_ms_params_{n}.csv", "wt") as f: f.write("spatial_radius, range_radius, n_diffs\n") for result in results: f.write(", ".join([ str(x) for x in result]) + "\n") return results