def file_as_training_vectors(filename, gram_size): worker_count = 8 with ProcessPoolExecutor(max_workers=worker_count) as exe: itr = enumerate(file_as_posts(filename)) futures = [] for i in range(worker_count): try: idx, post = next(itr) futures.append(exe.submit(post_to_vector, post, gram_size)) except StopIteration: break # we've submitted all the tasks already! while True: done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED) futures = [] for i in done: yield from i.result() try: idx, post = next(itr) futures.append(exe.submit(post_to_vector, post, gram_size)) except StopIteration: # just wait it out pass futures.extend(not_done) if len(futures) == 0: return
def check_open_orders(self): """ Loop through the list of open orders in the Portfolio object. For each executed order found, create a transaction and apply to the Portfolio. Returns ------- list[Transaction] """ futures = [] for asset in self.open_orders: exchange = self.exchanges[asset.exchange] futures.extend([ self.event_loop.run_in_executor( self.executor, self.get_transactions_for_order, asset.symbol, exchange, order) for order in self.open_orders[asset] ]) async_results = asyncio.gather(*futures, loop=self.event_loop) return [ txn for txn in self.event_loop.run_until_complete(async_results) if txn is not None ]
async def execute_async(start_time): loop = asyncio.get_event_loop() import concurrent.futures import os print(os.cpu_count()) executor = concurrent.futures.ThreadPoolExecutor() print(executor._max_workers) print() futures = [ loop.run_in_executor( None, http_request, start_time, 'http://example.org/' ) for i in range(3) ] futures.extend([loop.run_in_executor( None, execute_sql, start_time, "select * from users where name like :string ;", {'string': "%ck"} ) for i in range(3)]) await asyncio.gather(*futures)
def main(): args = parse_args() logger = logging.getLogger(config.LOGGER_NAME) logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG if args.verbose else logging.INFO) logger.debug("VERBOSE MODE ON") if args.output is not None: logger.debug(f"Saving log to: {args.output}") logger.addHandler(logging.FileHandler(args.output, encoding="utf-8", mode="w")) today = datetime.utcnow() start_date = (today - timedelta(days=args.max_days - 1)).date() end_date = today.date() logger.debug(f"[*] Query: {args.query}") logger.debug(f"[*] Date range: {start_date}~{end_date}") logger.debug(f"[*] Crawling targets: {', '.join(args.targets)}") logger.debug("[*] Running crawlers...") pool = concurrent.futures.ThreadPoolExecutor() futures = [] for q in args.query: crawling_targets = [target2crawler[target.lower()]() for target in args.targets] futures.extend( [ pool.submit( lambda query, start_date, end_date, analyse, main_columns_only: c.run( query=query, start_date=start_date, end_date=end_date, analyse=analyse, main_columns_only=main_columns_only, ), q, start_date, end_date, args.analyse, not args.all_columns, ) for c in crawling_targets ] ) try: for completed in concurrent.futures.as_completed(futures): print(f"[+] Done - {completed.result()}") except KeyboardInterrupt: # https://gist.github.com/clchiou/f2608cbe54403edb0b13 pool._threads.clear() concurrent.futures.thread._threads_queues.clear() raise pool.shutdown()
def getnewdata(self): """Get latest data from TaDo server and add to dataframe. Uses multithreaded pool to get data simultaneously. """ with concurrent.futures.ThreadPoolExecutor( max_workers=len(self.zones) + 1) as executor: # Load the operations then wait for them all to complete futures = [executor.submit(self.getweatherdata)] futures.extend([ executor.submit(self.getzonedata, zone) for zone in self.zones ]) futures, _ = concurrent.futures.wait(futures, timeout=30) return [future.result() for future in futures]
async def fuzz_domains(self, domains): with concurrent.futures.ThreadPoolExecutor( max_workers=self.threads) as executor: _loop = asyncio.get_event_loop() futures = [] for domain in domains: ext = tldextract.extract(domain) if ext.subdomain: level = ext.subdomain.count('.') + 1 else: level = 0 if level > int(self.max_level): _debug_('{} level is more than max_level({})' .format(domain, self.max_level)) continue if level == 0: futures.extend([ _loop.run_in_executor( executor, self.query_domain_arecord, '.'.join([sub, domain])) for sub in self.subs ]) else: futures.extend([ _loop.run_in_executor( executor, self.query_domain_arecord, '.'.join([nxt, domain])) for nxt in self.nexts ]) sub_results = await asyncio.gather(*futures) sub_results = list(filter(lambda _: len(_[1]) != 0, sub_results)) sub_domains = dict(sub_results).keys() futures = [ _loop.run_in_executor( executor, self.analysis_wirdcard_domain, sub_domain) for sub_domain in sub_domains ] sub_wd_results = await asyncio.gather(*futures) sub_wd_results = list(filter(lambda _: not _[1], sub_wd_results)) return dict(sub_results), dict(sub_wd_results)
return futures def do_work(dummy_file, work_input): print("{}: {}".format(dummy_file, work_input)) print("{}: Doing work {}...".format(dummy_file, work_input)) print('process id:', os.getpid()) time.sleep(1) return work_input * work_input if __name__ == '__main__': find_work_pool = ThreadPoolExecutor(max_workers=3) # do_work_pool=[] # for ele in range(0,3): # do_work_pool.append(ProcessPoolExecutor(max_workers=2)) dummy_files = [1, 2, 3] futures = [] for dummy_file in dummy_files: futures.extend( find_work_pool.submit(find_work_inputs, dummy_file).result()) # concurrent.futures.wait(futures) for dummy_file, work_input, future in futures: print("Result from file:{} input:{} is {}".format( dummy_file, work_input, sum(future)))
celery.cache_policy_requests() celery.cache_credential_authorization_mapping.apply_async(countdown=180) else: celery.cache_cloud_account_mapping() accounts_d = async_to_sync(get_account_id_to_name_mapping)(force_sync=True) default_celery_tasks.cache_application_information() executor = ThreadPoolExecutor(max_workers=os.cpu_count()) futures = [] for account_id in accounts_d.keys(): futures.extend( [ executor.submit(celery.cache_iam_resources_for_account, account_id), executor.submit(celery.cache_s3_buckets_for_account, account_id), executor.submit(celery.cache_sns_topics_for_account, account_id), executor.submit(celery.cache_sqs_queues_for_account, account_id), executor.submit(celery.cache_managed_policies_for_account, account_id), executor.submit( celery.cache_resources_from_aws_config_for_account, account_id ), ] ) for future in concurrent.futures.as_completed(futures): try: data = future.result() except Exception as exc: print("%r generated an exception: %s" % (future, exc)) celery.cache_policies_table_details() celery.cache_policy_requests() celery.cache_credential_authorization_mapping() # Forces writing config to S3
def main(time_limit, make_releases, main_only, debug, allow_unsafe): """Worker process for continuously building repodata for a maximum number of TIME_LIMIT seconds. """ global DEBUG DEBUG = debug start_time = time.time() # refresh at the start refresh_github_token_and_client() with timer(HEAD, "pulling repos"): os.makedirs(WORKDIR, exist_ok=True) if not os.path.exists("repodata-shards"): _clone_repodata_shards() if not os.path.exists(REPODATA_NAME): _clone_repodata() if not os.path.exists("conda-forge-repodata-patches-feedstock"): _clone_and_init_repodata_patches() with timer(HEAD, "loading local data"): all_repodata, all_links = _load_current_data(make_releases, allow_unsafe) all_channeldata = {} all_patched_repodata = {} while time.time() - start_time < time_limit: __dt = time.time() - start_time print("===================================================", flush=True) print("===================================================", flush=True) print( "used %ds of %ds total - %ds remaining" % ( __dt, time_limit, time_limit - __dt ), flush=True, ) print("===================================================", flush=True) print("===================================================", flush=True) build_start_time = time.time() refresh_github_token_and_client() with timer(HEAD, "doing repodata products rebuild"), ThreadPoolExecutor(max_workers=8) as exec: # noqa old_sha, new_sha, new_shards, removed_shards = _get_new_shards( all_links["current-shas"].get("repodata-shards-sha", None) ) # TODO force repatch if local data is inconsistent old_patch_sha, new_patch_sha, patch_fns = _update_and_reimport_patch_fns( all_links["current-shas"].get("repodata-patches-sha", None) ) repatch_all_pkgs = old_patch_sha != new_patch_sha utcnow = datetime.now().astimezone(pytz.UTC) updated_data = set() if ( make_releases and # None is a full rebuild, otherwise len > 0 means we have new ones # to add # we have to make a release if we need to repatch everything as well ( new_shards is None or len(new_shards) > 0 or removed_shards is None or len(removed_shards) > 0 or repatch_all_pkgs ) ): tag = utcnow.strftime("%Y.%m.%d.%H.%M.%S") rel = get_repodata().create_git_tag_and_release( tag, "", tag, "", _get_repodata_sha(), "commit", draft=True, ) futures = [] else: # do this to catch errors futures = None rel = None for subdir in CONDA_FORGE_SUBIDRS: try: _rebuild_subdir( subdir=subdir, new_shards=new_shards, removed_shards=removed_shards, repatch_all_pkgs=repatch_all_pkgs, all_repodata=all_repodata, all_patched_repodata=all_patched_repodata, all_links=all_links, updated_data=updated_data, make_releases=make_releases, main_only=main_only, patch_fns=patch_fns, futures=futures, rel=rel, exec=exec, ) except Exception: if rel is not None and futures is not None: for fn in list(all_links["serverdata"]): if f"_{subdir}" in fn: del all_links["serverdata"][fn] # rebuild it all if we error _rebuild_subdir( subdir=subdir, new_shards=None, removed_shards=None, repatch_all_pkgs=True, all_repodata=all_repodata, all_patched_repodata=all_patched_repodata, all_links=all_links, updated_data=updated_data, make_releases=make_releases, main_only=main_only, patch_fns=patch_fns, futures=futures, rel=rel, exec=exec, ) all_links["current-shas"]["repodata-shards-sha"] = new_sha all_links["current-shas"]["repodata-patches-sha"] = new_patch_sha if updated_data and make_releases: with timer(HEAD, "(re)building channel data"): futures.extend(_build_channel_data( all_channeldata, all_links, all_patched_repodata, all_links["labels"], updated_data, rel, exec, make_releases=make_releases, main_only=main_only, )) if updated_data and make_releases: with timer(HEAD, "waiting for repo/channel data uploads to finish"): for fut in concurrent.futures.as_completed(futures): fname, url = fut.result() if fname not in all_links["serverdata"]: all_links["serverdata"][fname] = [] all_links["serverdata"][fname].append(url) if len(all_links["serverdata"][fname]) > 3: all_links["serverdata"][fname] = \ all_links["serverdata"][fname][-3:] futures = [] with timer(HEAD, "writing and uploading links"): all_links["updated_at"] = utcnow.strftime("%Y-%m-%d %H:%M:%S %Z%z") futures.extend( _write_compress_and_start_upload( all_links, "links.json", rel, exec, only_compress=True, ) ) concurrent.futures.wait(futures) with timer(HEAD, "publishing release", result=False): rel.update_release(rel.title, rel.body, draft=False) if make_releases: with timer(HEAD, "deleting old releases"): tags = delete_old_repodata_releases(all_links) for tag in tags: print(f"{HEAD}deleted release {tag}", flush=True) dt = int(time.time() - build_start_time) if dt < MIN_UPDATE_TIME: print( "REPO WORKER: waiting for %s seconds before " "next update" % (MIN_UPDATE_TIME - dt), flush=True, ) time.sleep(MIN_UPDATE_TIME - dt) if DEBUG: with timer(HEAD, "dumping all data to JSON"): with open(f"{WORKDIR}/all_repodata.json", "w") as fp: json.dump(all_repodata, fp, indent=2, sort_keys=True) with open(f"{WORKDIR}/all_patched_repodata.json", "w") as fp: json.dump(all_patched_repodata, fp, indent=2, sort_keys=True) with open(f"{WORKDIR}/all_links.json", "w") as fp: json.dump(all_links, fp, indent=2, sort_keys=True) with open(f"{WORKDIR}/all_channeldata.json", "w") as fp: json.dump(all_channeldata, fp, indent=2, sort_keys=True)
def _rebuild_subdir( *, subdir, new_shards, removed_shards, repatch_all_pkgs, all_repodata, all_patched_repodata, all_links, updated_data, make_releases, main_only, patch_fns, futures, rel, exec, ): if new_shards is not None: new_subdir_shards = [ k for k in new_shards if k.startswith(f"repodata-shards/shards/{subdir}/") ] else: # this is a sentinal that indicates a full rebuild new_subdir_shards = None if removed_shards is not None: removed_subdir_shards = [ k for k in removed_shards if k.startswith(f"repodata-shards/shards/{subdir}/") ] else: # this is a sentinal that indicates a full rebuild removed_subdir_shards = None if subdir not in all_repodata: all_repodata[subdir] = {} if subdir not in all_patched_repodata: all_patched_repodata[subdir] = {} subdir_updated_data = set() with timer(HEAD, "processing shards for subdir %s" % subdir): if ( new_subdir_shards is None or len(new_subdir_shards) > 0 or removed_subdir_shards is None or len(removed_subdir_shards) > 0 ): with timer(HEAD, "making repodata", indent=1): subdir_updated_data = _update_repodata_from_shards( all_repodata, all_links, new_subdir_shards, removed_subdir_shards, subdir, ) updated_data |= subdir_updated_data all_labels = set(all_links["labels"]) all_labels |= set( [label for label in all_patched_repodata[subdir]]) all_labels |= set( [label for label in all_repodata[subdir]]) all_links["labels"] = sorted(all_labels) if make_releases and (subdir_updated_data or repatch_all_pkgs): with timer(HEAD, "patching and writing repodata", indent=1): for label in all_links["labels"]: if ( (subdir, label) not in updated_data and not repatch_all_pkgs ): continue if main_only and label != "main": continue if label not in all_repodata[subdir]: all_repodata[subdir][label] = \ _fetch_repodata(all_links, subdir, label) if label not in all_patched_repodata[subdir]: all_patched_repodata[subdir][label] = \ _fetch_patched_repodata( all_links, subdir, label ) if label == "broken": all_patched_repodata[subdir][label] = copy.deepcopy( all_repodata[subdir][label] ) else: _patch_repodata( all_repodata[subdir][label], all_patched_repodata[subdir][label], subdir, patch_fns, do_all=repatch_all_pkgs, ) futures.extend(_write_compress_and_start_upload( all_patched_repodata[subdir][label], f"repodata_{subdir}_{label}.json", rel, exec, )) with timer( HEAD, "building and writing current repodata", indent=1 ): for label in all_links["labels"]: if ( (subdir, label) not in updated_data and not repatch_all_pkgs ): continue if main_only and label != "main": continue if label not in all_patched_repodata[subdir]: with timer( HEAD, f"fetching patched repodata for " f"{label}/{subdir}", indent=3, ): all_patched_repodata[subdir][label] = \ _fetch_patched_repodata( all_links, subdir, label ) crd = build_current_repodata( subdir, all_patched_repodata[subdir][label], ) futures.extend(_write_compress_and_start_upload( crd, f"current_repodata_{subdir}_{label}.json", rel, exec, )) with timer( HEAD, "writing repodata from packages", indent=1 ): for label in all_links["labels"]: if (subdir, label) not in updated_data: continue if main_only and label != "main": continue futures.extend(_write_compress_and_start_upload( all_repodata[subdir][label], f"repodata_from_packages_{subdir}_{label}.json", rel, exec, ))
def main(): args = parse_args() logging.basicConfig(level=getattr(logging, args.log_level.upper())) # Load list of profiles from file. # # If file does not exist or list is empty, the profile list is initialized # with the default profile directory path. df_profiles = load_profiles_info(args.profiles_path) drop_version_errors(df_profiles, missing=False, mismatch=True, inplace=True) # Save most recent list of profiles to disk. with args.profiles_path.open('w') as output: profiles_str = yaml.dump( df_profiles[SAVED_COLUMNS].astype(str).to_dict('records'), default_flow_style=False) output.write(profiles_str) # Look up major version of each profile. df_profiles['major_version'] = df_profiles.path.map(profile_major_version) # Perform the following tasks in the background: # # - Upgrade `microdrop-launcher` package # - Cache latest `microdrop` package version with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: def _auto_upgrade(): process = sp.Popen( [sys.executable, '-m', 'microdrop_launcher.auto_upgrade']) return process.communicate() def _cache_latest_microdrop_version(): process = sp.Popen( [sys.executable, '-m', 'microdrop_launcher.microdrop_version']) return process.communicate() def _launch(args, df_profiles): if args.default or (not args.no_auto and df_profiles.shape[0] == 1): # Launch MicroDrop with most recently used (or only available) profile. return_code = launch_profile_row(df_profiles.iloc[0]) if return_code == 0: df_profiles.used_timestamp[0] = str(dt.datetime.now()) else: # Display dialog to manage profiles or launch a profile. launch_dialog = LaunchDialog(df_profiles) launch_dialog.run() return_code = launch_dialog.return_code df_profiles = launch_dialog.df_profiles # Save most recent list of profiles to disk (most recently used first). # # List can be changed using dialog by: # - Creating a new profile. # - Importing a profile. # - Updating used timestamp by launching a profile. df_profiles = df_profiles.astype(str) df_profiles.loc[df_profiles.used_timestamp == 'nan', 'used_timestamp'] = '' df_profiles.sort_values('used_timestamp', ascending=False, inplace=True) with args.profiles_path.open('w') as output: profiles_str = yaml.dump( df_profiles[SAVED_COLUMNS].to_dict('records'), default_flow_style=False) output.write(profiles_str) return return_code futures = [] if not args.no_upgrade: upgrade_future = executor.submit(_auto_upgrade) microdrop_version_future = \ executor.submit(_cache_latest_microdrop_version) futures.extend([upgrade_future, microdrop_version_future]) launch_future = executor.submit(_launch, args, df_profiles) futures.append(launch_future) concurrent.futures.wait(futures) return launch_future.result()