def __calc_signals_concurrency(self): """ deprecated: 速度慢 :return: """ futures = set() with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor: for func_name, param_dict in self.__ta_factors: try: func = self.func_lib[func_name] except KeyError as e: raise e if not param_dict and func_name in inspect.getmembers(lsta, inspect.isfunction): param_dict = get_default_args(func) if func_name in inspect.getmembers(lsta, inspect.isfunction): max_period = num_bars_to_accumulate(func_name=func_name, **param_dict) else: max_period = func.lookback + 1 if len(self.dataframe) < max_period: continue else: future = executor.submit(func, self.dataframe.tail(max_period), **param_dict) futures.add(future) for future in concurrent.futures.as_completed(futures): ret = future.result() if len(ret.shape) > 1: ret = ret.ix[:, 0] ind = self.dataframe.index[-1] print len(self.dataframe), func_name, ind, ret[-1] self.dataframe.ix[ind, func_name] = ret[-1]
def execute(function, name): """ Submit a task to the pool """ future = executor.submit(function) future.name = name futures.add(future)
def _schedule_bears(self, bears): """ Schedules the tasks of bears. :param bears: A list of bear instances to be scheduled onto the process pool. """ bears_without_tasks = [] for bear in bears: if self.dependency_tracker.get_dependencies( bear): # pragma: no cover logging.warning( 'Dependencies for {!r} not yet resolved, holding back. ' 'This should not happen, the dependency tracking system ' 'should be smarter. Please report this to the developers.' .format(bear)) else: futures = set() for task in bear.generate_tasks(): bear_args, bear_kwargs = task if self.cache is None: future = self.event_loop.run_in_executor( self.executor, bear.execute_task, bear_args, bear_kwargs) else: # Execute the cache lookup in the default # ThreadPoolExecutor, so cache updates reflect properly # in the main process. future = self.event_loop.run_in_executor( None, self._execute_task_with_cache, bear, task) futures.add(future) self.running_futures[bear] = futures # Cleanup bears without tasks after all bears had the chance to # schedule their tasks. Not doing so might stop the run too # early, as the cleanup is also responsible for stopping the # event-loop when no more tasks do exist. if not futures: logging.debug('{!r} scheduled no tasks.'.format(bear)) bears_without_tasks.append(bear) continue for future in futures: future.add_done_callback(functools.partial( self._finish_task, bear)) logging.debug('Scheduled {!r} (tasks: {})'.format( bear, len(futures))) for bear in bears_without_tasks: self._cleanup_bear(bear)
def scale(size, smooth, source, target, concurrency): futures =set() with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: for sourceImage, targetImage in get_jobs(source, target): futures.add(executor.submit(scale_one, size, smooth, sourceImage, targetImage)) summary = wait_for(futures) if summary.canceled: executor.shutdown() return summary
def scale(size, smooth, source, target, concurrency): futures = set() with ProcessPoolExecutor(max_workers=concurrency) as executor: for sourceImage, targetImage in get_jobs(source, target): future = executor.submit(scale_one, size, smooth, sourceImage, targetImage) futures.add(future) summary = wait_for(futures) return summary
def pi_async(): """ Divide calculation into 4 chunks and create 4 processes to execute each chunk. """ ntasks = 4 # ntasks = multiprocessing.cpu_count() # number of (virtual) CPU cores # TODO: note the definition of `chunk_size`. This will be the number of # calculations performed in each call to sample_multiple() # (no code change required) chunk_size = total_samples // ntasks # divide work into 4 chunks # TODO: define an empty set of Future instances named `futures` futures = set() # TODO: write a `with` statement to use a ProcessPoolExecutor. # with ThreadPoolExecutor(max_workers=ntasks) as executor: with ProcessPoolExecutor() as executor: # TODO: set up a `for` loop that executes `ntasks` times. for _ in range(ntasks): # TODO: for each loop iteration, use a Process to execute # sample_multiple with the argument chunk_size. # Save the returned Future in a local variable. future = executor.submit(sample_multiple, chunk_size) # TODO: add the returned Future to the `futures` set. futures.add(future) # Or, using a list comprehension: # futures = [executor.submit(sample_multiple, chunk_size) # for _ in range(ntasks)] # TODO: note the definition of `hits` # (no code change required) hits = 0 # TODO: set up a `for` loop to get the result of each process as it # completes. for future in concurrent.futures.as_completed(futures): # TODO: add the process's result to `hits` hits += future.result() # Or, if you prefer the compact generator expression syntax: # hits = sum(future.result() for future in # concurrent.futures.as_completed(futures)) # TODO: note how the value of `hits` is used in the next statement # (no code change required) pi = 4.0 * hits/total_samples return pi
def update_parallel(env, constraint): #volfrac = constraint.volume_frac() #env.xmin = constraint.density_min() #env.xmax = constraint.density_max() futures = set() data = [] with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor: for idx_col, col_x, col_dc in get_jobs(env): future = executor.submit(col_agents_act, idx_col, col_x, col_dc) futures.add(future) data = wait_for(futures, env) return env.combine_data(data)
def _submit_tasks(executor, fn, pickled, tasks): """ Submits the function providing the correct amount of arguments """ futures = set() for task in tasks: future = None # Run the function depending on it's required arguments if task.args and task.kwargs: future = executor.submit(_fn_wrapper, fn, pickled, *task.args, **task.kwargs) elif task.args: future = executor.submit(_fn_wrapper, fn, pickled, *task.args) else: future = executor.submit(_fn_wrapper, fn, pickled) futures.add(future) return futures
def locked(self) -> int: '''How much longer we'll hold the lock (unless we extend or release it). If we don't currently hold the lock, then this method returns 0. >>> printer_lock_1 = Redlock(key='printer') >>> printer_lock_1.locked() 0 >>> printer_lock_2 = Redlock(key='printer') >>> printer_lock_2.acquire() True >>> printer_lock_1.locked() 0 >>> printer_lock_2.release() If we do currently hold the lock, then this method returns the current lease's Time To Live (TTL) in ms. >>> printer_lock_1.acquire() True >>> 9 * 1000 < printer_lock_1.locked() < 10 * 1000 True >>> printer_lock_1.release() ''' with ContextTimer() as timer, \ concurrent.futures.ThreadPoolExecutor() as executor: futures = set() for master in self.masters: future = executor.submit(self.__acquired_master, master) futures.add(future) ttls = [] for future in concurrent.futures.as_completed(futures): try: ttls.append(future.result()) except RedisError as error: # pragma: no cover _logger.error(error, exc_info=True) num_masters_acquired = sum(1 for ttl in ttls if ttl > 0) quorum = num_masters_acquired >= len(self.masters) // 2 + 1 if quorum: ttls = sorted(ttls, reverse=True) validity_time = ttls[len(self.masters) // 2] validity_time -= round(timer.elapsed() + self.__drift()) return max(validity_time, 0) else: return 0
def scale(size, source, target, report_progress, state, when_finished): futures = set() with concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor: for sourceImage, targetImage in get_jobs(source, target): future = executor.submit(scale_one, size, sourceImage, targetImage, state) future.add_done_callback(report_progress) futures.add(future) if state.value in {CANCELED, TERMINATING}: executor.shutdown() for future in futures: future.cancel() break concurrent.futures.wait(futures) # Keep working until finished if state.value != TERMINATING: when_finished()
def extend(self) -> None: '''Extend our hold on the lock (if we currently hold it). Usage: >>> printer_lock = Redlock(key='printer') >>> printer_lock.acquire() True >>> 9 * 1000 < printer_lock.locked() < 10 * 1000 True >>> time.sleep(1) >>> 8 * 1000 < printer_lock.locked() < 9 * 1000 True >>> printer_lock.extend() >>> 9 * 1000 < printer_lock.locked() < 10 * 1000 True >>> printer_lock.release() ''' if self._extension_num >= self.num_extensions: raise TooManyExtensions(self.masters, self.key) else: quorum = False with BailOutExecutor() as executor: futures = set() for master in self.masters: future = executor.submit(self.__extend_master, master) futures.add(future) num_masters_extended = 0 for future in concurrent.futures.as_completed(futures): try: num_masters_extended += future.result() except RedisError as error: # pragma: no cover _logger.exception( '%s.extend() caught an %s', self.__class__.__name__, error.__class__.__name__, ) else: quorum = num_masters_extended >= len( self.masters) // 2 + 1 if quorum: break self._extension_num += quorum if not quorum: raise ExtendUnlockedLock(self.masters, self.key)
def process_items(indir, outdir, report): errors=[] with get_executor() as executor: futures=set() start=time.time() nextreport=start+report problems=True while problems: problems=False for f in _files.keys(): try: unicode(f) except UnicodeDecodeError: problems=True errors.append("Skipping %r due to unicode issues" % f) del _files[f] problems=True break for f in sorted(_files.keys()): task=executor.submit(wrapper, process_item, indir, outdir, f) task.filename=f futures.add(task) for i, task in enumerate(concurrent.futures.as_completed(futures)): now=time.time() if now>=nextreport: nextreport=now+report eta=int((len(futures)-i)*(now-start)/(i+1)) print i+1, "of", len(futures), str(int(100*(i+1)/len(futures)))+"% ", \ "ETA", str(eta)+"s", len(errors), "errors", "\r", sys.stdout.flush() try: res=task.result() _files[task.filename].update(res) except Exception as e: errors.append( (task.filename, str(e)) ) if len(errors)>100: for f in futures: f.cancel() break print "Done in", int(time.time()-start), "seconds", " "*40 if len(errors): errors.sort() print "There are", len(errors), "errors" for n, s in errors[:50]: print n, s sys.exit(1)
def test_remove_and_contains(self): futures = exfoliate.Futures() future_1 = concurrent.futures.Future() future_2 = concurrent.futures.Future() future_2.set_result(1) futures.add(future_1) futures.add(future_2) assert len(futures) == 2 assert future_1 in futures assert future_2 in futures futures.remove(future_1) assert len(futures) == 1 assert future_2 in futures assert future_1 not in futures with pytest.raises(KeyError): futures.remove(future_1)
def canta(concorrenza, parola, listaFile): if concorrenza is True: futures = set() with concurrent.futures.ProcessPoolExecutor( max_workers=len(listaFile)) as executor: for parola, nomeFile in get_jobs(parola, listaFile): future = executor.submit(cerca, parola, nomeFile) futures.add(future) wait_for(futures, parola, listaFile) else: print("NO CONCORRENZA") exit(1)
def _current_id(self, value): futures, num_masters_set = set(), 0 with concurrent.futures.ThreadPoolExecutor() as executor: for master in self.masters: future = executor.submit( self._set_id_script, keys=(self.key, ), args=(value, ), client=master, ) futures.add(future) for future in concurrent.futures.as_completed(futures): with contextlib.suppress(TimeoutError, ConnectionError): num_masters_set += future.result() == value if num_masters_set < len(self.masters) // 2 + 1: raise QuorumNotAchieved(self.masters, self.key)
def run_text(path): # 给验证码的url拼接16位随机数 futures = set() company_list = read_company2(path) with concurrent.futures.ProcessPoolExecutor(2) as executor: for company in company_list: future = executor.submit(run, company) futures.add(future) try: for future in concurrent.futures.as_completed( futures): # 返回一个迭代器,yield那些完成的futures对象。fs里面有重复的也只可能返回一次。任何futures在调用as_completed()调用之前完成首先被yield。 err = future.exception() if err is not None: raise err except KeyboardInterrupt: print("stopped by hand")
def locked(self): '''How much longer we'll hold the lock (unless we extend or release it). If we don't currently hold the lock, then this method returns 0. >>> printer_lock_1 = Redlock(key='printer') >>> printer_lock_1.locked() 0 >>> printer_lock_2 = Redlock(key='printer') >>> printer_lock_2.acquire() True >>> printer_lock_1.locked() 0 >>> printer_lock_2.release() If we do currently hold the lock, then this method returns the current lease's Time To Live (TTL) in ms. >>> printer_lock_1.acquire() True >>> 9 * 1000 < printer_lock_1.locked() < 10 * 1000 True >>> printer_lock_1.release() ''' futures, num_masters_acquired, ttls = set(), 0, [] with ContextTimer() as timer, \ concurrent.futures.ThreadPoolExecutor( max_workers=len(self.masters), ) as executor: for master in self.masters: futures.add(executor.submit(self._acquired_master, master)) for future in concurrent.futures.as_completed(futures): with contextlib.suppress(TimeoutError, ConnectionError): ttl = future.result() num_masters_acquired += ttl > 0 ttls.append(ttl) quorum = num_masters_acquired >= len(self.masters) // 2 + 1 if quorum: ttls = sorted(ttls, reverse=True) validity_time = ttls[len(self.masters) // 2] validity_time -= timer.elapsed() + self._drift() return max(validity_time, 0) else: return 0
def scale(size, source, target, report_progress, state, when_finished): futures = set() with concurrent.futures.ProcessPoolExecutor( max_workers=multiprocessing.cpu_count()) as executor: for sourceImage, targetImage in get_jobs(source, target): future = executor.submit(scale_one, size, sourceImage, targetImage, state) future.add_done_callback(report_progress) futures.add(future) if state.value in {CANCELED, TERMINATING}: for future in futures: future.cancel() executor.shutdown() break concurrent.futures.wait(futures) # Keep working until finished if state.value != TERMINATING: when_finished()
def main(): limit, concurrency = handle_commandline() Qtrac.report("starting...") filename = os.path.join(os.path.dirname(__file__), "whatsnew.dat") futures = set() with concurrent.futures.ThreadPoolExecutor( max_workers=concurrency) as executor: for feed in Feed.iter(filename): future = executor.submit(Feed.read, feed, limit) futures.add(future) done, filename, canceled = process(futures) if canceled: executor.shutdown() Qtrac.report("read {}/{} feeds using {} threads{}".format(done, len(futures), concurrency, " [canceled]" if canceled else "")) print() if not canceled: webbrowser.open(filename)
def main(): src_dir = './pics/' des_dir = './skins/' filename_list = os.listdir(src_dir) futures = set() with concurrent.futures.ProcessPoolExecutor() as executor: for filename in filename_list: future = executor.submit(worker, filename, src_dir, des_dir) futures.add(future) try: for future in concurrent.futures.as_completed(futures): err = future.exception() if err is not None: raise err except KeyboardInterrupt: print("stopped by hand")
def main(): limit, concurrency = handle_commandline() Qtrac.report("starting...") filename = os.path.join(os.path.dirname(__file__), "whatsnew.dat") futures = set() with concurrent.futures.ProcessPoolExecutor( max_workers=concurrency) as executor: for feed in Feed.iter(filename): future = executor.submit(Feed.read, feed, limit) futures.add(future) done, filename, canceled = process(futures) if canceled: executor.shutdown() Qtrac.report("read {}/{} feeds using {} processes{}".format( done, len(futures), concurrency, " [canceled]" if canceled else "")) print() if not canceled: webbrowser.open(filename)
def main(args: Args) -> None: """ Main program. Reads files from disk and optionally sends each file to Discovery """ discovery = DiscoveryV1( args.version, url=args.url, username=args.username, password=args.password, iam_apikey=args.iam_api_key, ) args.environment_id = writable_environment_id(discovery) collections = discovery.list_collections( args.environment_id).get_result()["collections"] if len(collections) == 1: args.collection_id = collections[0]["collection_id"] if not args.collection_id: if collections: print( "Error: multiple collections found. Please specify which one to use." ) else: print( "Error: no target collection found. Please create a collection." ) exit(1) target = Target(discovery, args.environment_id, args.collection_id) index_list = existing_sha1s(target) work_q: queue.Queue = queue.Queue(CONCURRENCY) with concurrent.futures.ThreadPoolExecutor(max_workers=CONCURRENCY + 1) as executor: executor.submit(walk_paths, args.paths, index_list, work_q, args.dry_run) futures = set() item = work_q.get() while item: futures.add(executor.submit(send_file, target, *item)) while len(futures) >= CONCURRENCY: # We're at our desired concurrency, wait for something to complete. _, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) item = work_q.get()
def scale(size, smooth, source, target, concurrency): futures = set() # data structure set() like tuple, disordered, non-slicing with concurrent.futures.ProcessPoolExecutor( # process pool executor used for CPU intensive computing concurrency max_workers=concurrency) as executor: # make processes! for sourceImage, targetImage in get_jobs( source, target): # get_jobs returns a generator of paths of images future = executor.submit( scale_one, size, smooth, sourceImage, targetImage ) # submit(fn, *args, **kwargs) returns a future instance: future.running(), future.done futures.add( future ) # add future instance into futures set(): run the process if future instance in pool summary = wait_for( futures ) # future's wait() returns a tuple(set(completed), set(uncompleted)). Not wait_for if summary.canceled: # if there is no this "if" here, executor will be shutdown as well for the "with" above. executor.shutdown() # shutdown all sub processes return summary
def task_runner(func, values, index=0, quantity=None, max_workers=16, chunk_size=100, *args, **kwargs): """ This slices an array as needed, chunks it up, and launches tasks """ quant = index + quantity if quantity else None _index = index vals = values[index:quant] with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: futures = set() for items in chunked(vals, chunk_size): kwargs['index'] = _index futures.add(executor.submit(func, items, *args, **kwargs)) _index += len(items) for fut in concurrent.futures.as_completed(futures): try: fut.result() except Exception as exec : raise index += len(items)
def exec_stream(callable, iterable, sync=lambda _: False, capacity=10, rate=10): """ Executes a stream according to a defined rate limit. """ limiter = Limiter(capacity, rate, MemoryStorage()) futures = set() def execute(operation): return (operation, callable(operation)) with concurrent.futures.ThreadPoolExecutor( max_workers=capacity) as executor: while True: if not limiter.consume("stream", 1): start = int(time.time()) done, pending = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) for future in done: yield future.result() futures = pending if (int(time.time()) - start) < 1: time.sleep( 1.0 / rate ) # guarantee there's capacity in the rate limit at end of the loop operation = next(iterable, None) if not operation: done, _ = concurrent.futures.wait(futures) for future in done: yield future.result() break if sync(operation): yield execute(operation) continue futures.add(executor.submit(execute, operation))
def main(): data_dir = sys.argv[1] output_dir = sys.argv[2] scan_depth = int(sys.argv[3]) try: os.makedirs(output_dir) output_dir = os.path.abspath(output_dir) except OSError as e: if e.errno != 17: print('Error: cannot create output path "%s": %s.' % (output_dir, str(e))) return 1 else: output_dir = os.path.abspath(output_dir) try: os.chdir(data_dir) except Exception as e: print('Error: cannot chdir to path "%s": %s.' % (data_dir, str(e))) return 1 traverse_logdir('.', scan_depth) try: os.chdir(output_dir) except Exception as e: print('Error: cannot chdir to path "%s": %s. Use pwd ("%s") instead.' % (output_dir, str(e), os.getcwd())) with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_WORKERS * 2) as executor: futures = set() for col in (evecollections, netstatcollections, sysstatcollections, psstatcollections): for name, collection in col.items(): futures.add(executor.submit(collection.to_xlsx)) for future in concurrent.futures.as_completed(futures): try: future.result() except Exception as e: print(Colors.RED + 'Error: %s' % e + Colors.ENDC) print(traceback.format_exc()) os.system("grep -r 'Sample size' | sort | tee 'sample_size.txt'")
def _readPackages(self, onReady): wrongCpu = 'i386' if sys.maxsize > 2 ** 32 else 'amd64' packageFilenames = [] descFilenames = [] for name in glob.iglob(f'{DATA_DIR}/*'): if wrongCpu not in name and fnmatch.fnmatch(name, PACKAGE_PATTERN): packageFilenames.append(name) elif fnmatch.fnmatch(name, DESC_PATTERN): descFilenames.append(name) onReady('Reading Packages files…', False) descForName = {} allDebs = [] try: with concurrent.futures.ProcessPoolExecutor() as executor: futures = set() for filename in packageFilenames: futures.add(executor.submit(self._readPackageFile, filename)) for filename in descFilenames: futures.add(executor.submit(self._readDescFile, filename)) for future in concurrent.futures.as_completed(futures): kind, data = future.result() if kind is FutureKind.DEBS: allDebs += data elif kind is FutureKind.DESCS: descForName.update(data) seen = set() for deb in allDebs: if deb.name in seen: continue # Some debs appear in > 1 Packages files seen.add(deb.name) desc = descForName.get(deb.name) if desc is not None: deb = deb._replace(desc=desc) self._debForName[deb.name] = deb onReady(f'Read {len(self._debForName):,d} packages from ' f'{len(packageFilenames):,d} Packages files in ' f'{time.monotonic() - self.timer:0.1f}sec…', False) except OSError as err: print(err)
def __threads(n, func, *nargs): futures = set() results = set() with ThreadPoolExecutor(max_workers=n) as T: [futures.add(T.submit(func, arg)) for arg in nargs] for future in concurrent.futures.as_completed(futures): err = future.exception() if err is not None: cprint(err, "red") results.add(future.result()) return results
def _acquire_masters(self): self._value = os.urandom(self.num_random_bytes) self._extension_num = 0 futures, num_masters_acquired = set(), 0 with ContextTimer() as timer, \ concurrent.futures.ThreadPoolExecutor() as executor: for master in self.masters: futures.add(executor.submit(self._acquire_master, master)) for future in concurrent.futures.as_completed(futures): with contextlib.suppress(TimeoutError, ConnectionError): num_masters_acquired += future.result() quorum = num_masters_acquired >= len(self.masters) // 2 + 1 elapsed = timer.elapsed() - self._drift() validity_time = self.auto_release_time - elapsed if quorum and max(validity_time, 0): return True else: with contextlib.suppress(ReleaseUnlockedLock): self.release() return False
def main(): limit, concurrency = handle_commandline() Qtrac.report("starting...") filename = os.path.join(os.path.dirname(__file__), "whatsnew.dat") futures = set() # instances set of futures with concurrent.futures.ThreadPoolExecutor( max_workers=concurrency) as executor: for feed in Feed.iter(filename): # a generator of Feed(title, url) future = executor.submit( Feed.read, feed, limit) # Feed.read is fn; feed and limit are fn's parameters futures.add(future) # add future instance into futures set done, filename, canceled = process(futures) if canceled: executor.shutdown() Qtrac.report("read {}/{} feeds using {} threads{}".format( done, len(futures), concurrency, " [canceled]" if canceled else "")) print() if not canceled: webbrowser.open(filename)
def release(self) -> None: '''Unlock the lock. Usage: >>> printer_lock = Redlock(key='printer') >>> bool(printer_lock.locked()) False >>> printer_lock.acquire() True >>> bool(printer_lock.locked()) True >>> printer_lock.release() >>> bool(printer_lock.locked()) False ''' quorum = False with BailOutExecutor() as executor: futures = set() for master in self.masters: future = executor.submit(self.__release_master, master) futures.add(future) num_masters_released = 0 for future in concurrent.futures.as_completed(futures): try: num_masters_released += future.result() except RedisError as error: # pragma: no cover _logger.exception( '%s.release() caught an %s', self.__class__.__name__, error.__class__.__name__, ) else: quorum = num_masters_released >= len(self.masters) // 2 + 1 if quorum: break if not quorum: raise ReleaseUnlockedLock(self.masters, self.key)
def init(): global agent_logged, processing_pool say_hello() if not pg.connect(): kill_me(1) log("✓ Connected.", "DEBUG") if not pg.can_do_work(): log("Another agent is running already. Goodbye.", "NOTICE") kill_me(0) agent_logged = pg.add_agent() if not agent_logged: kill_me(1) max_threads = config["maxThreads"] with concurrent.futures.ThreadPoolExecutor() as executor: futures = {executor.submit(do_work)} while futures: done, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) for fut in done: work_done(fut) del fut if len(processing_pool) > 0: for i in range( 0, min(max_threads - len(futures), processable_model_count())): executr = executor.submit(do_work) futures.add(executr) log("Adding another thread. " + str(executr), "DEBUG") log("No more processing to do. Goodbye.", "NOTICE") kill_me(0)
def __calc_signals_concurrency(self): """ deprecated: 速度慢 :return: """ futures = set() with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor: for func_name, param_dict in self.__ta_factors: try: func = self.func_lib[func_name] except KeyError as e: raise e if not param_dict and func_name in inspect.getmembers( lsta, inspect.isfunction): param_dict = get_default_args(func) if func_name in inspect.getmembers(lsta, inspect.isfunction): max_period = num_bars_to_accumulate(func_name=func_name, **param_dict) else: max_period = func.lookback + 1 if len(self.dataframe) < max_period: continue else: future = executor.submit(func, self.dataframe.tail(max_period), **param_dict) futures.add(future) for future in concurrent.futures.as_completed(futures): ret = future.result() if len(ret.shape) > 1: ret = ret.ix[:, 0] ind = self.dataframe.index[-1] print len(self.dataframe), func_name, ind, ret[-1] self.dataframe.ix[ind, func_name] = ret[-1]
def main(): limit, concurrency = handle_commandline() print("Starting...") filename = os.path.join(os.path.dirname(__file__), "whatsnew.dat") futures = set() with concurrent.futures.ThreadPoolExecutor( max_workers=concurrency) as executor: # TODO Fix me! # BEGIN Write correct implementation here for feed_source in feed.iter(filename): future = executor.submit(feed.read, feed_source, limit) futures.add(future) done, filename, canceled = process(futures) if canceled: executor.shutdown() # END print("Read {}/{} feeds using {} threads{}".format(done, len(futures), concurrency, " [canceled]" if canceled else "")) print() if not canceled: webbrowser.open(filename)
def main(): limit, concurrency = handle_commandline() print("Starting...") filename = os.path.join(os.path.dirname(__file__), "whatsnew.dat") futures = set() with concurrent.futures.ThreadPoolExecutor( max_workers=concurrency) as executor: # TODO Fix me! # BEGIN Write correct implementation here for feed_source in feed.iter(filename): future = executor.submit(feed.read, feed_source, limit) futures.add(future) done, filename, canceled = process(futures) if canceled: executor.shutdown() # END print("Read {}/{} feeds using {} threads{}".format( done, len(futures), concurrency, " [canceled]" if canceled else "")) print() if not canceled: webbrowser.open(filename)
def test_len(self): futures = exfoliate.Futures() future_1 = concurrent.futures.Future() future_2 = concurrent.futures.Future() futures.add(future_1) assert len(futures) == 1 futures.add(future_1) assert len(futures) == 1 futures.add(future_2) assert len(futures) == 2
def thread_this(fn, vars=None, args=None, max_threads=multiprocessing.cpu_count(), timeout=None): """ :type args: dict :param args: dictionary with with keyword arguments to pass to fn. The key is the corresponding var :param fn: function object :param vars: list of positional arguments to pass to fn :param max_threads: :param timeout: :return: """ logger.info(" starting multithreading: pool of {}".format(max_threads)) futures = set() with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: if vars and args: for var in vars: future = executor.submit(fn, var, **args[var]) futures.add(future) canceled, results = wait_for(futures, timeout=timeout) if canceled: logger.info("Cancelled...") executor.shutdown() elif vars: for var in vars: future = executor.submit(fn, var) futures.add(future) canceled, results = wait_for(futures, timeout=timeout) if canceled: logger.info("Cancelled...") executor.shutdown() elif args: for arg in args: future = executor.submit(fn, arg, **args[arg]) futures.add(future) canceled, results = wait_for(futures, timeout=timeout) if canceled: logger.info("Cancelled...") executor.shutdown() else: raise ValueError("Either args or vars (or both) must be specified") logger.info( " ran {} devices using {} threads{}".format(len(futures), max_threads, " [canceled]" if canceled else "")) return results
def test_iter(self): futures = exfoliate.Futures() future_1 = concurrent.futures.Future() future_2 = concurrent.futures.Future() future_3 = concurrent.futures.Future() future_1.set_result(None) futures.add(future_1) futures.add(future_2) future_2.set_result(None) futures.add(future_3) expected_futures = set((future_1, future_2, )) for i, future in enumerate(futures): if i == 0: assert future in expected_futures expected_futures.remove(future) if i == 1: assert future in expected_futures future_3.set_result(None) if i == 2: assert future == future_3
def queue_item(): try: item = next(items) except StopIteration: return futures.add(executor.submit(func, *item))
def processAlgorithm(self, parameters, context, feedback): """ Here is where the processing itself takes place. """ # Retrieve the feature source and sink. The 'dest_id' variable is used # to uniquely identify the feature sink, and must be included in the # dictionary returned by the processAlgorithm function. inputLyr = self.parameterAsVectorLayer(parameters, self.INPUT, context) if inputLyr is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT)) onlySelected = self.parameterAsBool(parameters, self.SELECTED, context) featCount = inputLyr.featureCount() if not onlySelected \ else inputLyr.selectedFeatureCount() features = inputLyr.getFeatures() if not onlySelected \ else inputLyr.getSelectedFeatures() # Compute the number of steps to display within the progress bar and # get features from source self.total = 100.0 / featCount if featCount else 0 (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, inputLyr.fields(), inputLyr.wkbType(), inputLyr.sourceCrs()) vector_utils = VectorUtils() inputPolygonsLyrList = self.parameterAsLayerList( parameters, self.INPUT_POLYGONS, context) if inputPolygonsLyrList == []: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT_POLYGONS)) num_workers = self.parameterAsInt(parameters, self.NUM_CPU, context) num_feats_per_worker = self.parameterAsInt(parameters, self.NUM_FEATS, context) def compute(feature): geom = feature.geometry() request = QgsFeatureRequest() request.setFilterRect(geom.boundingBox()) # for lyr in inputPolygonsLyrList: featList = map(lambda x: x.getFeatures(request), inputPolygonsLyrList) for feat in itertools.chain.from_iterable(featList): if feedback is not None and feedback.isCanceled(): return if geom.intersects(feat.geometry()): sink.addFeature(feature, QgsFeatureSink.FastInsert) break self.current_feat += 1 if feedback is not None: feedback.setProgress(int(self.current_feat * self.total)) return self.current_feat = 0 with concurrent.futures.ThreadPoolExecutor() as executor: futures = { executor.submit(compute, task): task for task in itertools.islice(features, num_feats_per_worker) } while futures: if feedback is not None and feedback.isCanceled(): break # Wait for the next future to complete. done, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) # Schedule the next set of futures. We don't want more than N futures # in the pool at a time, to keep memory consumption down. for task in itertools.islice(features, len(done)): futures.add(executor.submit(compute, task)) done, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.ALL_COMPLETED) return {self.OUTPUT: dest_id}
def _recursive_download(base_url, download_directory=".", username=None, password=None, include=None, exclude=None, skip_existing=True, download_jobs=10, download_retries=3, crawler_args=None): """Concurrent recursive downloader using the itsybitsy crawler Arguments --------- base_url : str Starting point for crawler download_directory : str Directory to save files to (default: current directory) username : str Username required for authentication (default: no authentication) password : str Password required for authentication (default: no authentication) include : list of str or str Download only files matching at least one of those glob patterns (default: download all files) exclude : list of str or str Do not download files matching at least one of those glob patterns (default: download all files) skip_existing : bool skip existing files download_jobs : int Number of concurrent jobs used for downloading files download_retries : int Number of retries crawler_args : dict Keyword arguments to pass to itsybitsy.crawl """ logger.debug("crawling %s", base_url) if crawler_args is None: crawler_args = {} if username and password: crawler_args["auth"] = (username, password) crawler = itsybitsy.crawl(base_url, **crawler_args) base_url_normalized = next(crawler) base_path = urlparse.urlparse(base_url_normalized).path if isinstance(include, str): include = [include] if isinstance(exclude, str): exclude = [exclude] with requests.Session() as session: session.auth = (username, password) futures = set() with concurrent.futures.ThreadPoolExecutor( max_workers=download_jobs) as executor: for url in crawler: logger.debug("> found link: %s" % url) url_parts = urlparse.urlparse(url) file_path = url_parts.path if not file_path.startswith(base_path): warnings.warn( "File {} does not match base path {} - skipping". format(file_path, base_path)) continue target_localpath = os.path.normpath(file_path[len(base_path):]) target_fname = os.path.basename(target_localpath) target_fullpath = os.path.join(download_directory, target_fname) if (include and not any( fnmatch.fnmatch(target_localpath, pattern) for pattern in include)): logger.debug(">> skipping due to include pattern") continue if (exclude and any( fnmatch.fnmatch(target_localpath, pattern) for pattern in exclude)): logger.debug(">> skipping due to exclude pattern") continue logger.debug(">> downloading") args = (url, target_fullpath, session, download_retries, skip_existing) future = executor.submit(_download_file, *args) futures.add(future) if futures: for future in concurrent.futures.as_completed(futures): yield future.result()
def threadify(function, items, max_threads=10, throw_exceptions=False, arg_items=False): """ Threadpool helper. Automagically multi-threadifies a function with some items. Handles generators correctly by only submitting max_threads * 2 items to the threadpool at a time. Returns an iterator that produces (item, result) tuples in real-time. By default exceptions are returned in the results instead of thrown. See throw_exceptions. :param function: Function to execute on each item. Called like function(item) by default. See arg_items for an alternative. :param items: Iterable (or generator) of items to submit to the threadpool :param max_threads: Maximum number of threads to run at a time :param throw_exceptions: Throw exceptions instead of returning them. Exception.item is set to the original item. :param arg_items: Each item is an iterable of positional arguments or a dict of keyword arguments for the function. Function calls become function(*item) or function(**item) if the item is a dict. :return: Generator producing iter((item, result)...) """ import concurrent.futures thread_pool = concurrent.futures.ThreadPoolExecutor( max_workers=max_threads) futures = set() # There are certain generators, like range() that are iterable but not # iterators and produce repeating, never-depleting lists of numbers for # some reason. This fixes that problem. items = iter(items) # since there's no way to get the original item from a future we have to # use this helper. # # this also handles exceptions. we can't use future.exception() since we'd # have no way to associate items with their exceptions. this makes handling # results from threadify a bit more annoying but meh... I can't really # think of a better option. def thread_helper(item): try: if arg_items: if isinstance(item, dict): result = function(**item) elif is_iterable(item): result = function(*item) else: raise RuntimeError( 'arg_items is set but item is not an iterable or dict') else: result = function(item) return item, result except Exception as exception: return item, exception running = True while running or futures: # submit to threadpool # only submits max_threads * 2 at a time, in case items is a big generator for item in items: future = thread_pool.submit(thread_helper, item) futures.add(future) if len(futures) > max_threads * 2: break else: running = False # now we wait for some futures to complete # in order to provide results to the caller in realtime we use FIRST_COMPLETED done, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) for future in done: exception = future.exception() if exception: # we should hopefully never reach this raise exception item, result = future.result() if throw_exceptions and isinstance(result, Exception): result.item = item raise result else: yield item, result