def run(self): pool = ThreadPool(processes=self._worker_number) if self._batch > 1: pool.imap_unordered(func=self._func_wrap_batch, iterable=iter(self._forerver_get_batch, None)) else: pool.imap_unordered(func=self._func_wrap, iterable=iter(self._forever_get, None)) while not self._stop_event.is_set(): time.sleep(0.01)
class SiteThreadChecker(SiteChecker): pool_size_key = "thread_pool_size" def __init__(self, *args, thread_pool_size=1, **kwargs): #super(SiteThreadChecker, self).__init__(*args, **kwargs) SiteChecker.__init__(self, *args, output_buff_size=thread_pool_size*50, **kwargs) self.max_thread = thread_pool_size LinkChecker.max_http_connection = self.max_thread LinkChecker.max_pool_size = self.max_thread #self.pool = multiprocessing.Pool(processes=self.max_thread, maxtasksperchild=1) self.pool = ThreadPool(processes=self.max_thread) self._set_task_control_max(self.max_thread) #self.temp_queue = Queue(self.max_thread * 2) #self.temp_queue.put(self.page_list[0]) #self.pump = OnSiteLinkPump(self.temp_queue, self.page_list) #print("init siteThreadChecker finished") @staticmethod def get_input_parameter(full_link: str, max_page:int, max_level: int, output_queue, pool_size: int): temp = SiteChecker.get_input_parameter_base(full_link, max_page, max_level, output_queue) temp.update({SiteThreadChecker.pool_size_key: pool_size}) return temp def additional_reset(self): if self.pool is not None: self.pool.terminate() self.pool = ThreadPool(processes=self.max_thread) #self.pool = multiprocessing.Pool(processes=self.max_thread, maxtasksperchild=1) def addtional_clear(self): if self.pool is not None: self.pool.terminate() def stop(self): try: self.data_source.set_continue_lock(False) self.pool.terminate() except: pass super(SiteThreadChecker, self).stop() def begin_crawl(self, level=0): #while self.can_continue() and self.data_source.can_continue(): #print("continue to work, page limit:", self.max_page, " max_level: ", self.max_level) #target_func = functools.partial(PageChecker.crawl_page, self) try: self.pool.imap_unordered(PageChecker.crawl_page_for_iter, self.data_source) while self.data_source.can_continue(): time.sleep(0.1) #results = [self.pool.apply_async(PageChecker.crawl_page, args=(self, page)) # for page in self.data_source.get_next(OnSiteLink.TypeOnSite, ResponseCode.LinkOK)] #[p.get() for p in results] except Exception as ex: #self.stop() msg = "begin_crawl() " + str(self.get_site_info()) ErrorLogger.log_error("SiteThreadChecker", ex, msg)
def main(): assert sys.version_info[0] == 3 moduleset_versions = get_moduleset_versions() pool = ThreadPool(20) pool_iter = pool.imap_unordered(_fetch_version, moduleset_versions.keys()) arch_versions = {} for i, some_versions in enumerate(pool_iter): arch_versions.update(some_versions) for name, version in sorted(moduleset_versions.items()): arch_name = fix_name(name) if arch_name in arch_versions: arch_version, arch_url = arch_versions[arch_name] arch_version = arch_version.split("+", 1)[0] if arch_name == "readline": arch_version = ".".join(arch_version.split(".")[:2]) else: arch_version = "???" arch_url = "" if is_maybe_newer(arch_version, version): print("%-30s %-20s %-20s %s" % ( name, version, arch_version, arch_url))
def main(argv): """Go Main Go""" scenario = int(argv[1]) sdf = load_scenarios() queue = realtime_run(sdf.loc[scenario], scenario) pool = ThreadPool() # defaults to cpu-count sz = len(queue) failures = 0 def _run(row): """ Run ! """ wr = WeppRun(row[0], row[1], row[2], scenario) return wr.run() sts00 = datetime.datetime.now() sts0 = datetime.datetime.now() for i, res in enumerate(pool.imap_unordered(_run, queue), 1): if not res: failures += 1 if failures > 100: print("ABORT due to more than 100 failures...") sys.exit(10) if i > 0 and i % 5000 == 0: delta00 = datetime.datetime.now() - sts00 delta0 = datetime.datetime.now() - sts0 speed00 = i / delta00.total_seconds() speed0 = 5000 / delta0.total_seconds() remaining = ((sz - i) / speed00) / 3600. sts0 = datetime.datetime.now() print(( '%5.2fh Processed %6s/%6s [inst/tot %.2f/%.2f rps] ' 'remaining: %5.2fh' ) % (delta00.total_seconds() / 3600., i, sz, speed0, speed00, remaining) )
def test_GIL(): """tests running of multiple queries in a threadpool""" vertices, triangles = triangle_soup(10000, (-5, 5)) mesh0 = pyopcode.Model(vertices, triangles) vertices, triangles = triangle_soup(10000, (-5, 5)) mesh1 = pyopcode.Model(vertices, triangles) col = pyopcode.Collision(mesh0, mesh1) identity = np.identity(4).astype(np.float32) def transform_generator(): """generate affine rotation matrices""" np.random.seed(42) for i in range(100): r = np.random.normal(size=(3, 3)) u, _, v = np.linalg.svd(r) r = u.dot(np.eye(*r.shape)).dot(v) a = identity.copy() a[:3, :3] = r yield a from multiprocessing.pool import ThreadPool pool = ThreadPool(processes=4) results = pool.imap_unordered(lambda affine: col.query(affine, identity), transform_generator()) import time start = time.clock() for r in results: print(len(r)) print(time.clock() - start)
def get_for_genres(genres): genres = set(genres) playlists = {} new_genres = set() for page in xrange(5): args = [] for g in genres: args.append((g, page)) try: pool = ThreadPool(PROCESSES) pfunc = parse_page for i, res in enumerate(pool.imap_unordered(pfunc, args)): genre, page, pl, found = res print "%d/%d" % (i + 1, len(args)) playlists.update(pl) new_genres |= found if not pl: genres.remove(genre) except Exception as e: print e return playlists, [] finally: pool.terminate() pool.join() return playlists, new_genres
def UrlMode(corpus, request_parallelism): """Finds Wayback Machine URLs and writes them to disk. Args: corpus: A corpus. request_parallelism: The number of concurrent requests. """ for dataset in datasets: print "Finding Wayback Machine URLs for the %s set:" % dataset old_urls_filename = "%s/%s_urls.txt" % (corpus, dataset) new_urls_filename = "%s/wayback_%s_urls.txt" % (corpus, dataset) urls = ReadMultipleUrls(old_urls_filename) p = ThreadPool(request_parallelism) results = p.imap_unordered(WaybackUrl, urls) progress_bar = ProgressBar(len(urls)) new_urls = [] for result in results: if result: new_urls.append(result) progress_bar.Increment() WriteUrls(new_urls_filename, new_urls)
def collect_stats(args): bots_json = swarming_query('bots', '--limit', '10000') if 'error' in bots_json: return bots_json bots = bots_json.get('items', []) stats = { 'bots_count': len(bots), 'bots': {}, } logging.info('Found %d bots; collecting tasks.', stats['bots_count']) pool = ThreadPool(100) count = 0 def func(bot): return process_bot(args, bot) try: for bot_id, bot_results in pool.imap_unordered(func, bots): count += 1 stats['bots'][bot_id] = bot_results logging.info('%4d of %4d (%2.0f%%) of bots processed', count, stats['bots_count'], count * 100 / stats['bots_count']) except KeyboardInterrupt as e: pass return stats
class parallel_map(collections.Iterable): def __init__(self, pool_size, function, *iterables): if not isinstance(pool_size, numbers.Integral): raise TypeError('pool_size must be an integer, not ' + repr(pool_size)) elif not callable(function): raise TypeError('function must be callable, not ' + repr(function)) elif not iterables: raise TypeError('missing iterable') self.pool = ThreadPool(pool_size) self.function = function self.results = self.pool.imap_unordered(self.map_function, zip(*iterables)) def map_function(self, args): try: value = self.function(*args) except Exception: return False, sys.exc_info() return True, value def __iter__(self): errors = [] for success, value in self.results: if success: yield value else: errors.append(value) self.pool.close() self.pool.join() for error in errors: exec('raise error[1], None, error[2]')
def _fit(self, dataset): est = self.getOrDefault(self.estimator) epm = self.getOrDefault(self.estimatorParamMaps) numModels = len(epm) eva = self.getOrDefault(self.evaluator) tRatio = self.getOrDefault(self.trainRatio) seed = self.getOrDefault(self.seed) randCol = self.uid + "_rand" df = dataset.select("*", rand(seed).alias(randCol)) condition = (df[randCol] >= tRatio) validation = df.filter(condition).cache() train = df.filter(~condition).cache() subModels = None collectSubModelsParam = self.getCollectSubModels() if collectSubModelsParam: subModels = [None for i in range(numModels)] tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam) pool = ThreadPool(processes=min(self.getParallelism(), numModels)) metrics = [None] * numModels for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks): metrics[j] = metric if collectSubModelsParam: subModels[j] = subModel train.unpersist() validation.unpersist() if eva.isLargerBetter(): bestIndex = np.argmax(metrics) else: bestIndex = np.argmin(metrics) bestModel = est.fit(dataset, epm[bestIndex]) return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
def get_first_result_from_threads(calls): calls = list(enumerate(calls)) def run_func(call): i, call = call func = call[0] args = call[1] if len(call)>1 else [] kwargs = call[2] if len(call)>2 else {} try: return i, func(*args, **kwargs) except Exception as e: return i, e pool = ThreadPool(processes=len(calls)) result = pool.imap_unordered(run_func, calls).next() for thread in pool._pool: # via http://stackoverflow.com/a/15274929 if not thread.isAlive(): continue exc = ctypes.py_object(SystemExit) res = ctypes.pythonapi.PyThreadState_SetAsyncExc( ctypes.c_long(thread.ident), exc) if res == 0: raise ValueError("nonexistent thread id") elif res > 1: # """if it returns a number greater than one, you're in trouble, # and you should call it again with exc=NULL to revert the effect""" ctypes.pythonapi.PyThreadState_SetAsyncExc(thread.ident, None) raise SystemError("PyThreadState_SetAsyncExc failed") return result
def convert2(names, name_func, nprocs=4): pool = ThreadPool(processes=nprocs) def converter(in_name): out_name = name_func(in_name) check_call(['convert', in_name, out_name]) return out_name return pool.imap_unordered(converter, names)
def process_threaded(img, filters, threadn = 8): accum = np.zeros_like(img) def f(kern): return cv2.filter2D(img, cv2.CV_8UC3, kern) pool = ThreadPool(processes=threadn) for fimg in pool.imap_unordered(f, filters): np.maximum(accum, fimg, accum) return accum
def run_jobs(self, f, jobs): if self.usecloud: jids = cloud.map(f, jobs, _env=self.cloud_env, _profile=True, _depends_on=self.preprocess_job) ires = cloud.iresult(jids) else: pool = ThreadPool(processes=cv2.getNumberOfCPUs()) ires = pool.imap_unordered(f, jobs) return ires
def process_threaded(img, filters, threadn = 8): accum = np.zeros_like(img) def f(kern): return cv2.filter2D(img, cv2.CV_8UC3, kern) pool = ThreadPool(processes=threadn) for fimg in pool.imap_unordered(f, filters): np.maximum(accum, fimg, accum) return accum
def main(api_url, api_access_token): client = dmapiclient.DataAPIClient(api_url, api_access_token) pool = ThreadPool(10) count = 1 for i in pool.imap_unordered(update(client), enumerate(client.find_services_iter())): count += i if count % 1000 == 0: print("** {}".format(count))
def check_migration(client, stage, framework_slug, draft_bucket, documents_bucket): do_check_draft_and_service = functools.partial(check_draft_and_service, client) pool = ThreadPool(10) drafts = pool.imap_unordered( do_check_draft_and_service, find_drafts(client, framework_slug)) for draft in drafts: pass
def f(n, num_processes=None): if num_processes is None: num_processes = mp.cpu_count() - 1 base_index = len(kwrds) tp = ThreadPool(num_processes) im = tp.imap_unordered(f_mp, range(base_index, n + 1)) return sum([v for v in im if not v is None])
def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False): results = [] pool = Pool(1) if debug else Pool(num_processes) for result in pool.imap_unordered(play, matches): print("+" if result[0].name == name else '-', end="") results.append(result) print() return results
def download(self): tp = ThreadPool(100) result = tp.imap_unordered(self.__worker, self.items) for item in result: ok, book_output_dir = item if not ok: logger = Log() logger.write_error("Intro: " + book_output_dir + '\n') tp.terminate()
def _threaded_read(self): elements = [idx for idx in range(1, len(self.annotation_db))] pool = ThreadPool(processes=4) with tqdm.tqdm(total=len(elements), disable=not is_master()) as pbar: for i, _ in enumerate(pool.imap_unordered(self._fill_cache, elements)): if i % 100 == 0: pbar.update(100) pool.close()
def maybe_convert_to_wav(base_dir): roots = list(os.walk(base_dir)) print("Converting and joining source audio files...") bar = progressbar.ProgressBar(max_value=len(roots), widgets=SIMPLE_BAR) tp = ThreadPool() for _ in bar(tp.imap_unordered(maybe_convert_one_to_wav, roots)): pass tp.close() tp.join()
def main(api_url, api_access_token): client = dmapiclient.DataAPIClient(api_url, api_access_token) pool = ThreadPool(10) count = 1 for i in pool.imap_unordered(update(client), enumerate(client.find_services_iter())): count += i if count % 1000 == 0: print("** {}".format(count))
def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False): results = [] pool = Pool(1) if debug else Pool(num_processes) print("Running {} games using {}:".format(len(matches), heuristic_type)) for result in pool.imap_unordered(play, matches): print("+" if result[0].name == name else '-', end="") results.append(result) print() return results
def _run_matches(matches, name, num_processes=NUM_PROCS): results = [] pool = Pool(num_processes) print("Running {} games:".format(len(matches))) for result in pool.imap_unordered(play, matches): print("+" if result[0].name == name else '-', end="") results.append(result) print() return results
def initialize(self): """Initialize the ARP spoofer """ self.victim = (self.config['to_ip'].value, getmacbyip(self.config['to_ip'].value)) if self.config['from_ip'].value is None: # Enumerate all IPs in network Msg("Gathering information on network...this may take a minute") thread_pool = ThreadPool(processes=25) ip_whitelist = {self.victim[0], self.local[0]} for ip, mac in thread_pool.imap_unordered(arp.get_mac_address_for_ip, (ip for ip in self.enumerate_all_ips_in_network( self.config['to_ip'].value, self.get_iface_netmask()))): if ip in ip_whitelist: continue if mac is None or mac == "ff:ff:ff:ff:ff:ff": # no mac for you, next! continue self.targets[ip] = mac # todo Consider adding an upper limit on hosts being poisoned elif "/" in self.config['from_ip'].value: source_ip, netmask = self.cidr_to_ip_and_netmask(self.config['from_ip'].value) # Enumerate all IPs in network Msg("Gathering information on network...this may take a minute") thread_pool = ThreadPool(processes=25) ip_whitelist = {self.victim[0], self.local[0]} for ip, mac in thread_pool.imap_unordered(arp.get_mac_address_for_ip, (ip for ip in self.enumerate_all_ips_in_network( source_ip, netmask))): if ip in ip_whitelist: continue if mac is None or mac == "ff:ff:ff:ff:ff:ff": # no mac for you, next! continue self.targets[ip] = mac # todo Consider adding an upper limit on hosts being poisoned else: self.targets[self.config['from_ip'].value] = getmacbyip(self.config['from_ip'].value) Msg("Initializing ARP poison...") return self.initialize_post_spoof()
def _run_ddls_with_invalidation(self, db, sync_ddl=False): """Test INVALIDATE METADATA with concurrent DDLs to see if any queries hang""" test_self = self class ThreadLocalClient(threading.local): def __init__(self): self.client = test_self.create_impala_client() if sync_ddl: self.client.set_configuration_option('sync_ddl', 'true') pool = ThreadPool(processes=8) tls = ThreadLocalClient() def run_ddls(i): tbl_name = db + ".test_" + str(i) for query_tmpl in [ # Create a partitioned and unpartitioned table "create table %s (i int)", "create table %s_part (i int) partitioned by (j int)", # Below queries could fail if running with invalidate metadata concurrently "alter table %s_part add partition (j=1)", "alter table %s_part add partition (j=2)", "invalidate metadata %s_part", "refresh %s", "refresh %s_part", "insert overwrite table %s select int_col from functional.alltypestiny", "insert overwrite table %s_part partition(j=1) values (1), (2), (3), (4), (5)", "insert overwrite table %s_part partition(j=2) values (1), (2), (3), (4), (5)" ]: try: query = query_tmpl % tbl_name # TODO(IMPALA-9123): Timeout logic here does not work for DDLs since they are # usually stuck in CREATED state and execute_async() won't return. We finally # use timeout in pytest.mark.timeout() but it's not precise. We should find a # more elegant way to detect timeout of DDLs. handle = tls.client.execute_async(query) is_finished = tls.client.wait_for_finished_timeout( handle, timeout=60) assert is_finished, "Query timeout(60s): " + query tls.client.close_query(handle) except ImpalaBeeswaxException as e: # Could raise exception when running with INVALIDATE METADATA assert TestConcurrentDdls.is_acceptable_error( str(e), sync_ddl), str(e) # TODO(IMPALA-9123): Detect hangs here instead of using pytest.mark.timeout() self.execute_query_expect_success(tls.client, "invalidate metadata") # Run DDLs in single thread first. Some bugs causing DDL hangs can be hidden when run # with concurrent DDLs. run_ddls(0) # Run DDLs with invalidate metadata in parallel NUM_ITERS = 16 for i in pool.imap_unordered(run_ddls, xrange(1, NUM_ITERS + 1)): pass
def main(compilation_db_path, source_files, verbose, formatter, iwyu_args): """ Entry point. """ # Canonicalize compilation database path if os.path.isdir(compilation_db_path): compilation_db_path = os.path.join(compilation_db_path, 'compile_commands.json') compilation_db_path = os.path.realpath(compilation_db_path) if not os.path.isfile(compilation_db_path): print('ERROR: No such file or directory: \'%s\'' % compilation_db_path) return 1 # Read compilation db from disk with open(compilation_db_path, 'r') as fileobj: compilation_db = json.load(fileobj) # expand symlinks for entry in compilation_db: entry['file'] = os.path.realpath(entry['file']) # Cross-reference source files with compilation database source_files = [os.path.realpath(s) for s in source_files] if not source_files: # No source files specified, analyze entire compilation database entries = compilation_db else: # Source files specified, analyze the ones appearing in compilation db, # warn for the rest. entries = [] for source in source_files: matches = [e for e in compilation_db if e['file'] == source] if matches: entries.extend(matches) else: print('WARNING: \'%s\' not found in compilation database.' % source) # Run analysis def run_iwyu_task(entry): cwd, compile_command = entry['directory'], entry['command'] compile_command = workaround_parent_dir_relative_includes( cwd, compile_command) return run_iwyu(cwd, compile_command, iwyu_args, verbose) pool = ThreadPool(multiprocessing.cpu_count()) try: for iwyu_output in pool.imap_unordered(run_iwyu_task, entries): formatter(iwyu_output) except KeyboardInterrupt as ki: sys.exit(1) except OSError as why: print('ERROR: Failed to launch include-what-you-use: %s' % why) return 1 finally: pool.terminate() pool.join() return 0
def main(compilation_db_path, source_files, verbose, formatter, iwyu_args): """ Entry point. """ # Canonicalize compilation database path if os.path.isdir(compilation_db_path): compilation_db_path = os.path.join(compilation_db_path, 'compile_commands.json') compilation_db_path = os.path.realpath(compilation_db_path) if not os.path.isfile(compilation_db_path): print('ERROR: No such file or directory: \'%s\'' % compilation_db_path) return 1 # Read compilation db from disk with open(compilation_db_path, 'r') as fileobj: compilation_db = json.load(fileobj) # expand symlinks for entry in compilation_db: entry['file'] = os.path.realpath(entry['file']) # Cross-reference source files with compilation database source_files = [os.path.realpath(s) for s in source_files] if not source_files: # No source files specified, analyze entire compilation database entries = compilation_db else: # Source files specified, analyze the ones appearing in compilation db, # warn for the rest. entries = [] for source in source_files: matches = [e for e in compilation_db if e['file'] == source] if matches: entries.extend(matches) else: print('WARNING: \'%s\' not found in compilation database.' % source) # Run analysis def run_iwyu_task(entry): cwd, compile_command = entry['directory'], entry['command'] compile_command = workaround_parent_dir_relative_includes( cwd, compile_command) return run_iwyu(cwd, compile_command, iwyu_args, verbose) pool = ThreadPool(multiprocessing.cpu_count()) try: for iwyu_output in pool.imap_unordered(run_iwyu_task, entries): formatter(iwyu_output) except KeyboardInterrupt as ki: sys.exit(1) except OSError as why: print('ERROR: Failed to launch include-what-you-use: %s' % why) return 1 finally: pool.terminate() pool.join() return 0
def fetch_input_sizes(args, slurm_jobs): data_path = os.path.join(args.cache_folder, 'speed_data_sizes.csv') try: with open(data_path) as f: reader = DictReader(f) cache = {int(row['run_id']): float(row['MB']) for row in reader} except OSError as ex: if ex.errno != errno.ENOENT: raise cache = {} session = KiveAPI(args.kive_server) session.login(args.kive_user, args.kive_password) fetcher = partial(fetch_input_size, cache=cache, kive_session=session) pool = ThreadPool() job_count = len(slurm_jobs) fetch_count = 0 failed_run_ids = set() last_error = None data_file = None data_writer = None input_sizes = {} try: for i, (run_id, input_size, is_cached, error_message) in enumerate( pool.imap_unordered(fetcher, slurm_jobs, chunksize=10)): if error_message is not None: last_error = error_message failed_run_ids.add(run_id) if not is_cached: if data_file is None: data_file = open(data_path, 'w') data_writer = DictWriter(data_file, ['run_id', 'MB']) data_writer.writeheader() for old_run_id, old_input_size in input_sizes.items(): data_writer.writerow({'run_id': old_run_id, 'MB': old_input_size}) if fetch_count % 10000 == 0: print('Fetched {} runs after scanning {} of {} at {}.'.format( fetch_count, i, job_count, datetime.now())) fetch_count += 1 input_sizes[run_id] = input_size if data_writer: data_writer.writerow({'run_id': run_id, 'MB': input_size}) finally: if data_file is not None: data_file.close() if failed_run_ids: message = 'Failed to fetch run ids: {}\n Caused by {}'.format( ', '.join(sorted(failed_run_ids)), last_error) raise RuntimeError(message) return input_sizes
def main(): forkme.fork(4) pool = ThreadPool(8) engine = wait_engine() Session.configure(bind=engine) Base.metadata.create_all(engine) for _ in pool.imap_unordered(data_generator, range(100)): pass
def update(self): self._db_dir_check_existence() new_hashes = self._get_online_hashes() old_hashes = self._get_local_hashes() old_to_delete = set(old_hashes) - set(new_hashes) for old_hash in old_to_delete: os.remove(old_hashes[old_hash]) new_to_download = set(new_hashes) - set(old_hashes) # for new_hash in new_to_download: # self._download_db_file(new_hashes[new_hash]) p = ThreadPool(20) p.imap_unordered(self._download_db_file, (new_hashes[new_hash] for new_hash in new_to_download)) p.close() p.join()
def poolList(self, method, items): results = [] if ENABLE_POOL and not DEBUG: pool = ThreadPool(CORES) results = pool.imap_unordered(method, items, chunksize=25) pool.close() pool.join() else: results = [method(item) for item in items] results = filter(None, results) return results
def setcaches(urls, section=''): def _setcache(url): setcache(url, section) pool = ThreadPool(8) res = pool.imap_unordered(_setcache, urls) for _ in res: pass pool.close() pool.join()
def poolList(method, items): results = [] if ENABLE_POOL: pool = ThreadPool(cpu_count()) results = pool.imap_unordered(method, items) pool.close() pool.join() else: results = [method(item) for item in items] results = filter(None, results) return results
def fetch_all_csvs(): pool = ThreadPool(4) results = pool.imap_unordered(fetch_csv, csv_urls) pool.close() pool.join() strings = [result.decode("utf-8") for result in results] csv_file = StringIO(''.join(strings)) return csv_file
def _get_online_hashes(self): cur_year = datetime.datetime.now().year hashes = {} # enclosed def get_online_hash(year): r = requests.get(get_db_address(year, 'meta')) sha256hash = r.text.split()[-1].split(':')[-1].upper() hashes[sha256hash] = get_db_address(year) try: p = ThreadPool(20) p.imap_unordered(get_online_hash, range(self.first_year, cur_year + 1)) p.close() p.join() except requests.exceptions.RequestException as e: logging.error(str(e)) return {} # couldn't retrieve all hashes return hashes
def process_downloads(self): threads = len(self.url_list) pool = ThreadPool(threads) results = pool.imap_unordered(self.download_url, self.url_list) for index, response in results: if response != '': self.playlists_all[index].update(response) else: self.playlists_all[index]['user_info'] = [] pool.terminate() self.buildPlaylistList()
def main(args): # pragma: no cover opts = parse_args(args) res = defaultdict(list) builders = get_builders() workers = ThreadPool(processes=opts.jobs) results = workers.imap_unordered(process_entry, builders) for result in results: res[result[0]].append(result[1]) print json.dumps(res, sort_keys=True, indent=2, separators=(',', ': '))
def test_magic_find_thread_safe(magic): pool = ThreadPool(32) m = Magic() assert m assert magic.check(find_db()) magic.load(find_db()) data = b'\xcf\xfa\xed\xfe\x07\x00\x00\x01\x03\x00\x00\x00\x02\x00\x00\x00' b'\x12\x00\x00\x000\x07\x00\x00\x85\x00 \x00\x00\x00\x00\x00\x19' for result in pool.imap_unordered(magic.guess_bytes, [data] * 32): assert 'Mach-O 64-bit x86_64 executable' in result magic.set_flags(mime_type=True) for result in pool.imap_unordered(magic.guess_bytes, [data] * 32): assert 'application/x-mach-binary' in result
def main(args): # pragma: no cover opts = parse_args(args) res = defaultdict(list) builders = get_builders() workers = ThreadPool(processes=opts.jobs) results = workers.imap_unordered(process_entry, builders) for result in results: res[result[0]].append(result[1]) print json.dumps(res, sort_keys=True, indent=2, separators=(',', ': '))
def wrapper(args_list): results = [] pool = ThreadPool() with tqdm_redirect_std() as orig_stdout: for result in tqdm(pool.imap_unordered(func, args_list), total=len(args_list), file=orig_stdout, dynamic_ncols=True): results.append(result) pool.close() pool.join() return results
def main(): sql = 'select p.id, p.title FROM `resolved_papers` p inner join `resolved_papers_title` pt on pt.Id = p.Id WHERE downloaded = 0 and pt.`title_language` = "en" and p.id >= 38304;' papers = pd.read_sql(sql, con=db) ids = list(zip(*[papers[c].values.tolist() for c in papers])) pool = ThreadPool() print(pool.imap_unordered(_download, ids)) pool.close() pool.join()
def process_threaded(img, filters, threadn = 8): def f(kern): return cv2.matchTemplate(img, kern, cv.CV_TM_CCORR_NORMED) pool = ThreadPool(processes=threadn) accum = None for fimg in pool.imap_unordered(f, filters): if (accum == None) : accum = np.zeros_like (fimg) accum += fimg * fimg return accum
def _fit(self, dataset): est = self.getOrDefault(self.estimator) epm = self.getOrDefault(self.estimatorParamMaps) numModels = len(epm) eva = self.getOrDefault(self.evaluator) nFolds = self.getOrDefault(self.numFolds) seed = self.getOrDefault(self.seed) h = 1.0 / nFolds randCol = self.uid + "_rand" df = dataset.select("*", F.rand(seed).alias(randCol)) metrics = np.zeros((numModels, nFolds)) pool = ThreadPool(processes=min(self.getParallelism(), numModels)) subModels = None collectSubModelsParam = self.getCollectSubModels() if collectSubModelsParam: subModels = [[None for j in range(numModels)] for i in range(nFolds)] for i in range(nFolds): if self.sequentialIndex: pass # todo pass a column name to base the split on. make sure the split conforms to sklearn norms. # idx = [1,2,3,4] # training.where(~col("id").isin(idx)).show() else: validateLB = i * h validateUB = (i + 1) * h condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB) validation = df.filter(condition).cache() train = df.filter(~condition).cache() tasks = self._parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam) for j, metric, subModel in pool.imap_unordered( lambda f: f(), tasks): metrics[j, i] = metric if collectSubModelsParam: subModels[i][j] = subModel validation.unpersist() train.unpersist() avgMetrics = np.mean(metrics, axis=1) if eva.isLargerBetter(): bestIndex = np.argmax(avgMetrics) else: bestIndex = np.argmin(avgMetrics) bestModel = est.fit(dataset, epm[bestIndex]) return self._copyValues( CrossValidatorModel(bestModel, avgMetrics.tolist(), subModels)), metrics
def imap_unordered_bar(func, args, n_processes=4): p = ThreadPool(n_processes) res_list = [] with tqdm(total=len(args)) as pbar: for i, res in tqdm(enumerate(p.imap_unordered(func, args))): pbar.update() res_list.append(res) pbar.close() p.close() p.join() return res_list
def thread_pool(): print("thread pool") import threading progress_thread = threading.Thread() progress_thread.daemon = True progress_thread.start() pool = ThreadPool(num_threads) it = pool.imap_unordered(func, args, chunksize=2) for s in it: print(s)
def each(coll, iter): if not len(coll): return [None, None] pool = Pool(len(coll)) try: for res in pool.imap_unordered(iter, coll): print(res) pass except Exception as e: return [None, e] return [None, None]
def titles(): sql = 'select id, title from resolved_papers;' papers = pd.read_sql(sql, con=db) ids = list(zip(*[papers[c].values.tolist() for c in papers])) pool = ThreadPool() print(pool.imap_unordered(_titlesLang, ids)) pool.close() pool.join()
def main(url_file, pool_size): urls = load_urls(url_file) click.echo('URLs to collect:') click.echo('\n'.join(urls)) click.echo('\nDownloading sites') pool = Pool(pool_size) with click.progressbar(pool.imap_unordered(parse_page, urls), length=len(urls)) as results: # with click.progressbar((parse_page(url) for url in urls), # length=len(urls)) as results: result = [r for r in results] click.echo('\n'.join(result))
def threaded_proc(vcf_files,cnv_params,reads,study,blood): pool = ThreadPool(48) moi_data = defaultdict(dict) task_list = [(x,cnv_params,str(reads),'threaded',study,blood) for x in vcf_files] try: moi_data = {vcf : data for vcf, data in pool.imap_unordered(arg_star, task_list)} except Exception: raise pool.close() pool.join() sys.exit(1) return moi_data
def run_jobs(self, f, jobs): if self.usecloud: jids = cloud.map(f, jobs, _env=self.cloud_env, _profile=True, _depends_on=self.preprocess_job) ires = cloud.iresult(jids) else: pool = ThreadPool(processes=cv2.getNumberOfCPUs()) ires = pool.imap_unordered(f, jobs) return ires
def run_farm(self): try: self._start_sending_feedback() input_t = threading.Thread(target=self._sample_data) input_t.start() # start sampling data self._progress_logger.start() self._db_buffer.start_input_output_cycle() # start input and output data to/from file pool = ThreadPool(processes=self._max_worker) # pool.imap_unordered(self._check_whois_with_dns, self._db_buffer, chunksize=1) pool.imap_unordered(self._check_whois_with_dns, iter(self.sample_gen, None), chunksize=1) while not self._stop_event.is_set() or not self._internal_stop_event.is_set(): time.sleep(1) if self._stop_event.is_set(): self._internal_stop_event.set() input_t.join() self._progress_logger.join() self._db_buffer.terminate() if self._stop_event.is_set(): self._finished = True self._end_sending_feedback() except Exception as ex: if self._stop_event.is_set(): self._finished = True ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "run_farm() index at:" + str(self._job_done))
def _total_samples(self, cls, feature, _ids): pool = ThreadPool(cpu_count()) feature_filter = self.feature_filter def x(_id): f = feature(_id=_id, persistence=cls) filtered = feature_filter(f) return len(filtered) if self.parallel: total_samples = sum(pool.imap_unordered(x, _ids)) else: total_samples = sum(map(x, _ids)) return total_samples
def derive_stats(args, begin_date, init_stats=None): """Process raw CQ updates log and derive stats. Fetches raw CQ events and returns the same format as organize_stats(). If ``init_stats`` are given, preserve the jobs stats and replace the other stats. """ stats = init_stats or default_stats() filters = ['project=%s' % args.project, 'action=patch_stop'] end_date = begin_date + datetime.timedelta(minutes=INTERVALS[args.range]) results = fetch_cq_logs(begin_date, end_date, filters=filters) if not results: return stats stats['begin'] = date_from_timestamp(results[-1]['timestamp']) stats['end'] = date_from_timestamp(results[0]['timestamp']) raw_patches = set() for reason in results: raw_patches.add((reason['fields']['issue'], reason['fields']['patchset'])) patch_stats = {} # Fetch and process each patchset log def get_patch_stats(patch_id): return derive_patch_stats(args, begin_date, end_date, patch_id) if args.seq or not args.thread_pool: iterable = map(get_patch_stats, raw_patches) else: pool = ThreadPool(min(args.thread_pool, len(raw_patches))) iterable = pool.imap_unordered(get_patch_stats, raw_patches) patches, issues = set(), set() for patch_id, pstats in iterable: if not pstats['supported']: continue patch_stats[patch_id] = pstats issue, patchset = patch_id issues.add(issue) patches.add((issue, patchset)) stats['issue-count'] = len(issues) stats['patchset-count'] = len(patches) stats['patch_stats'] = patch_stats _derive_stats_from_patch_stats(stats) return stats
def parse_playlists(pl_dict): result = {} try: pool = ThreadPool(PROCESSES) pfunc = parse_playlist args = pl_dict.keys() for i, (uri, streams) in enumerate(pool.imap_unordered(pfunc, args)): print "%d/%d" % (i + 1, len(args)) result[uri] = (pl_dict[uri], streams) except Exception as e: print e return {} finally: pool.terminate() pool.join() return result
def fetch_urls(url_iterator, download_root=None, concurrency=2, chunk_size=DEFAULT_CHUNK_SIZE): pool = ThreadPool(processes=concurrency) if download_root is not None: iterable = ((i, os.path.join(download_root, j)) for i, j in url_iterator) else: iterable = ((i, None) for i, j in url_iterator) for i in pool.imap_unordered(safe_retrieve_file, iterable): if not i: continue status_code, elapsed_time, url, local_filename = i logging.info('HTTP %d (%0.2fs) %s', status_code, elapsed_time, url) pool.close() pool.join()
def DownloadMode(corpus, request_parallelism): """Downloads the URLs for the specified corpus. Args: corpus: A corpus. request_parallelism: The number of concurrent download requests. """ missing_urls = [] for dataset in datasets: print 'Downloading URLs for the %s set:' % dataset urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset) urls = ReadUrls(urls_filename) missing_urls_filename = '%s/missing_urls.txt' % corpus if os.path.exists(missing_urls_filename): print 'Only downloading missing URLs' urls = list(set(urls).intersection(ReadUrls(missing_urls_filename))) p = ThreadPool(request_parallelism) results = p.imap_unordered(DownloadMapper, izip(urls, repeat(corpus))) progress_bar = ProgressBar(len(urls)) collected_urls = [] try: for url, story_html in results: if story_html: collected_urls.append(url) progress_bar.Increment() except KeyboardInterrupt: print('Interrupted by user') missing_urls.extend(set(urls) - set(collected_urls)) WriteUrls('%s/missing_urls.txt' % corpus, missing_urls) if missing_urls: print ('%d URLs couldn\'t be downloaded, see %s/missing_urls.txt.' % (len(missing_urls), corpus)) print 'Try and run the command again to download the missing URLs.'
def main(): print 'Usage: git log | %s\n\n' % sys.argv[0] print 'Counting (press CTRL+C to stop)' pool = ThreadPool(250) count = 0 stats = {} try: for x in pool.imap_unordered(CheckSafe, ExtractCRFromStdin()): count += 1 stats.setdefault(x, 0) stats[x] += 1 stats_str = '' for k, v in sorted(stats.items()): fmt = '%s: %d (%.2f %%) ' % (k, v, v * 100.0 / count) stats_str += '%-25s' % fmt print '\r[%d] %s' % (count, stats_str), sys.stdout.flush() except KeyboardInterrupt as e: pass print '\n\n'