def run(log_level='INFO', parallel=0): logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(level=log_level, format=logformat) logger = logging.getLogger(__name__) print(PROGNAME + ' v' + VERSION + ' started.') feeds = [] if parallel <= 1: for feeditem in CONFIG.cp.sections(): feed = Feed(feeditem) feed.parse_feed() feeds.append(feed) for feed in feeds: if len(feed.entries) > 0: logger.info('Storing ' + str(len(feed.entries)) + ' items' + ' from ' + feed.name + ' on ' + feed.imap.host + '.') for entry in feed.entries: feed.imap.store_entry(feed, entry) feed.new_to_cache() else: logging.warning('Using ' + str(parallel) + ' connections for ' + 'fetching and storing. This can cause issues on some' + 'some IMAP servers. Use at your own risk!') future_data = [] with ThreadPoolExecutor(max_workers=parallel) as executor: for feeditem in CONFIG.cp.sections(): feed = Feed(feeditem) feeds.append(feed) future_data = {executor.submit(parallel_parse, feed): feed for feed in feeds} for feed in as_completed(future_data): pass for feed in feeds: with ThreadPoolExecutor(max_workers=parallel) as executor: future_data = {executor.submit(feed.imap.store_entry, feed, entry): entry for entry in feed.entries} for nothing in as_completed(future_data): pass
def process(self, asgs): original_count = len(asgs) asgs = [a for a in asgs if a['SuspendedProcesses']] self.delay = self.data.get('delay', 30) self.log.debug("Filtered from %d to %d suspended asgs" % ( original_count, len(asgs))) with self.executor_factory(max_workers=3) as w: futures = {} for a in asgs: futures[w.submit(self.resume_asg_instances, a)] = a for f in as_completed(futures): if f.exception(): log.error("Traceback resume asg:%s instances error:%s" % ( futures[f]['AutoScalingGroupName'], f.exception())) continue log.debug("Sleeping for asg health check grace") time.sleep(self.delay) with self.executor_factory(max_workers=3) as w: futures = {} for a in asgs: futures[w.submit(self.resume_asg, a)] = a for f in as_completed(futures): if f.exception(): log.error("Traceback resume asg:%s error:%s" % ( futures[f]['AutoScalingGroupName'], f.exception()))
def create_tasks(taskgraph, label_to_taskid): # TODO: use the taskGroupId of the decision task task_group_id = slugid() taskid_to_label = {t: l for l, t in label_to_taskid.iteritems()} session = requests.Session() # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase # that limit. Connections are established as needed, so using a large value # should not negatively impact performance. http_adapter = requests.adapters.HTTPAdapter(pool_connections=CONCURRENCY, pool_maxsize=CONCURRENCY) session.mount('https://', http_adapter) session.mount('http://', http_adapter) decision_task_id = os.environ.get('TASK_ID') with futures.ThreadPoolExecutor(CONCURRENCY) as e: fs = {} # We can't submit a task until its dependencies have been submitted. # So our strategy is to walk the graph and submit tasks once all # their dependencies have been submitted. # # Using visit_postorder() here isn't the most efficient: we'll # block waiting for dependencies of task N to submit even though # dependencies for task N+1 may be finished. If we need to optimize # this further, we can build a graph of task dependencies and walk # that. for task_id in taskgraph.graph.visit_postorder(): task_def = taskgraph.tasks[task_id].task attributes = taskgraph.tasks[task_id].attributes # if this task has no dependencies, make it depend on this decision # task so that it does not start immediately; and so that if this loop # fails halfway through, none of the already-created tasks run. if decision_task_id and not task_def.get('dependencies'): task_def['dependencies'] = [decision_task_id] task_def['taskGroupId'] = task_group_id task_def['schedulerId'] = '-' # Wait for dependencies before submitting this. deps_fs = [fs[dep] for dep in task_def.get('dependencies', []) if dep in fs] for f in futures.as_completed(deps_fs): f.result() fs[task_id] = e.submit(_create_task, session, task_id, taskid_to_label[task_id], task_def) # Schedule tasks as many times as task_duplicates indicates for i in range(1, attributes.get('task_duplicates', 1)): # We use slugid() since we want a distinct task id fs[task_id] = e.submit(_create_task, session, slugid(), taskid_to_label[task_id], task_def) # Wait for all futures to complete. for f in futures.as_completed(fs.values()): f.result()
def test_pure(client): N = 10 with client.get_executor() as e: fs = [e.submit(get_random) for i in range(N)] res = [fut.result() for fut in as_completed(fs)] assert len(set(res)) < len(res) with client.get_executor(pure=False) as e: fs = [e.submit(get_random) for i in range(N)] res = [fut.result() for fut in as_completed(fs)] assert len(set(res)) == len(res)
def joined_map_seq(futures): """ :param futures: :return: """ as_completed(futures) for future in futures: res = future.result() yield res
def get_elb_bucket_locations(self): session = local_session(self.manager.session_factory) client = session.client('elb') # Try to use the cache if it exists elbs = self.manager._cache.get( {'region': self.manager.config.region, 'resource': 'elb'}) # Sigh, post query refactor reuse, we can't save our cache here # as that resource manager does extra lookups on tags. Not # worth paginating, since with cache usage we have full set in # mem. if elbs is None: p = client.get_paginator('describe_load_balancers') results = p.paginate() elbs = results.build_full_result().get( 'LoadBalancerDescriptions', ()) self.log.info("Queried %d elbs", len(elbs)) else: self.log.info("Using %d cached elbs", len(elbs)) get_elb_attrs = functools.partial( _query_elb_attrs, self.manager.session_factory) with self.executor_factory(max_workers=2) as w: futures = [] for elb_set in chunks(elbs, 100): futures.append(w.submit(get_elb_attrs, elb_set)) for f in as_completed(futures): if f.exception(): log.error("Error while scanning elb log targets: %s" % ( f.exception())) continue for tgt in f.result(): yield tgt
def _process_bucket(self, b, p, key_log, w): count = 0 for key_set in p: keys = self.get_keys(b, key_set) count += len(keys) futures = [] for batch in chunks(keys, size=100): if not batch: continue futures.append(w.submit(self.process_chunk, batch, b)) for f in as_completed(futures): if f.exception(): log.exception("Exception Processing bucket:%s key batch %s" % ( b['Name'], f.exception())) continue r = f.result() if r: key_log.add(r) # Log completion at info level, progress at debug level if key_set['IsTruncated']: log.debug('Scan progress bucket:%s keys:%d remediated:%d ...', b['Name'], count, key_log.count) else: log.info('Scan Complete bucket:%s keys:%d remediated:%d', b['Name'], count, key_log.count) b['KeyScanCount'] = count b['KeyRemediated'] = key_log.count return { 'Bucket': b['Name'], 'Remediated': key_log.count, 'Count': count}
def create(self): results = {} with futures.ThreadPoolExecutor(max_workers=4) as executor: plugin_futures = dict((executor.submit(p.collect, self.timeframe), p) for p in self.plugins) for future in futures.as_completed(plugin_futures): plugin = plugin_futures[future] try: if rapport.config.get_int("rapport", "verbosity") >= 2: print "Result for {0}: {1}".format(plugin.alias, future.result()) template = rapport.template.get_template(plugin, "text") if template: results[plugin] = template.render(future.result()) except Exception as e: print >>sys.stderr, "Failed plugin {0}:{1}: {2}!".format(plugin, plugin.alias, e) # TODO: Generate mail template # Print results sorted by plugin appearance in config file (i.e. init order): for plugin in self.plugins: try: print results[plugin] except KeyError as e: # A missing result for plugins means an exception happened # above, which already printed an error message, thus: pass
def add_tags_to_results(self, client, elbs): """ Gets the tags for the ELBs and adds them to the result set. """ elb_names = [elb['LoadBalancerName'] for elb in elbs] names_to_tags = {} fn = partial(self.process_tags, client=client) futures = [] with self.executor_factory(max_workers=3) as w: # max 20 ELBs per call (API limitation) for elb_names_chunk in chunks(elb_names, size=20): futures.append( w.submit(fn, elb_names_chunk)) for f in as_completed(futures): if f.exception(): self.log.exception("Exception Processing ELB: %s" % ( f.exception())) continue r = f.result() if r: names_to_tags.update(r) for elb in elbs: elb['Tags'] = names_to_tags[elb['LoadBalancerName']]
def process(self, volumes): original_count = len(volumes) volumes = [v for v in volumes if not v['Encrypted'] or not v['Attachments']] log.debug( "EncryptVolumes filtered from %d to %d " " unencrypted attached volumes" % ( original_count, len(volumes))) # Group volumes by instance id instance_vol_map = {} for v in volumes: instance_id = v['Attachments'][0]['InstanceId'] instance_vol_map.setdefault(instance_id, []).append(v) # Query instances to find current instance state self.instance_map = { i['InstanceId']: i for i in query_instances( local_session(self.manager.session_factory), InstanceIds=instance_vol_map.keys())} with self.executor_factory(max_workers=10) as w: futures = {} for instance_id, vol_set in instance_vol_map.items(): futures[w.submit( self.process_volume, instance_id, vol_set)] = instance_id for f in as_completed(futures): if f.exception(): instance_id = futures[f] log.error( "Exception processing instance:%s volset: %s \n %s" % ( instance_id, instance_vol_map[instance_id], f.exception()))
def process(self, buckets): results = [] with self.executor_factory(max_workers=3) as w: futures = {} for b in buckets: futures[w.submit(self.process_bucket, b)] = b for f in as_completed(futures): if f.exception(): self.log.error( "Error on bucket:%s region:%s policy:%s error: %s", b['Name'], b.get('Location', 'unknown'), self.manager.data.get('name'), f.exception()) self.denied_buckets.append(b['Name']) continue result = f.result() if result: results.append(result) if self.denied_buckets and self.manager.log_dir: with open( os.path.join( self.manager.log_dir, 'denied.json'), 'w') as fh: json.dump(self.denied_buckets, fh, indent=2) self.denied_buckets = [] return results
def _process_identify_futures(self, futures, opts, instances): self.out.debug('scan._process_identify_futures') checkpoint = datetime.now() i = 0 to_scan = {} cancelled = False for future in as_completed(futures): if common.shutdown: if not cancelled: map(lambda x: x.cancel(), futures) cancelled = True continue url = future.url try: cms_name, result_tuple = future.result(timeout=opts['timeout_host']) if cms_name != None: if cms_name not in to_scan: to_scan[cms_name] = [] to_scan[cms_name].append(result_tuple) except: f.exc_handle(url, self.out, self.app.testing) i += 1 if to_scan: self._process_scan(opts, instances, to_scan) to_scan = {}
def start_upload(self): """Method to start upload""" LOGGER.warning("Starting to upload %d sequences...", len(self.sequences)) user = self.login_controller.login() with THREAD_LOCK: total = 0 for sequence in self.sequences: total = total + len(sequence.visual_items) self.progress_bar = tqdm(total=total) sequence_operation = SequenceUploadOperation(self, user.access_token, self.max_workers) with ThreadPoolExecutor(max_workers=1) as executors: futures = [executors.submit(sequence_operation.upload, sequence) for sequence in self.sequences] report = [] for future in as_completed(futures): success, sequence = future.result() report.append((success, sequence)) if success: LOGGER.warning(" Uploaded sequence from %s, " "the sequence will be available after " "processing at %s", sequence.path, self.login_controller.osc_api.sequence_link(sequence)) else: LOGGER.warning(" Failed to upload sequence at %s. Restart the script in " "order to finish you upload for this sequence.", sequence.path) LOGGER.warning("Finished uploading") self.progress_bar.close()
def resources(self, query=None): client = local_session(self.manager.session_factory).client('config') paginator = client.get_paginator('list_discovered_resources') pages = paginator.paginate( resourceType=self.manager.get_model().config_type) results = [] with self.manager.executor_factory(max_workers=5) as w: ridents = pager(pages, self.retry) resource_ids = [ r['resourceId'] for r in ridents.get('resourceIdentifiers', ())] self.manager.log.debug( "querying %d %s resources", len(resource_ids), self.manager.__class__.__name__.lower()) for resource_set in chunks(resource_ids, 50): futures = [] futures.append(w.submit(self.get_resources, resource_set)) for f in as_completed(futures): if f.exception(): self.manager.log.error( "Exception getting resources from config \n %s" % ( f.exception())) results.extend(f.result()) return results
def testWaitingForSomeButNotAllConcurrentFutureInvocations(self): pool = logging_pool.pool(test_constants.THREAD_CONCURRENCY) request = b'\x67\x68' expected_response = self._handler.handle_unary_unary(request, None) response_futures = [None] * test_constants.THREAD_CONCURRENCY lock = threading.Lock() test_is_running_cell = [True] def wrap_future(future): def wrap(): try: return future.result() except grpc.RpcError: with lock: if test_is_running_cell[0]: raise return None return wrap multi_callable = _unary_unary_multi_callable(self._channel) for index in range(test_constants.THREAD_CONCURRENCY): inner_response_future = multi_callable.future( request, metadata=( (b'test', b'WaitingForSomeButNotAllConcurrentFutureInvocations'),)) outer_response_future = pool.submit(wrap_future(inner_response_future)) response_futures[index] = outer_response_future some_completed_response_futures_iterator = itertools.islice( futures.as_completed(response_futures), test_constants.THREAD_CONCURRENCY // 2) for response_future in some_completed_response_futures_iterator: self.assertEqual(expected_response, response_future.result()) with lock: test_is_running_cell[0] = False
def process(self, asgs): msg_tmpl = self.data.get( 'msg', 'AutoScaleGroup does not meet org tag policy: {op}@{stop_date}') op = self.data.get('op', 'suspend') tag = self.data.get('tag', DEFAULT_TAG) date = self.data.get('days', 4) n = datetime.now(tz=tzutc()) stop_date = n + timedelta(days=date) msg = msg_tmpl.format( op=op, stop_date=stop_date.strftime('%Y/%m/%d')) self.log.info("Tagging %d asgs for %s on %s" % ( len(asgs), op, stop_date.strftime('%Y/%m/%d'))) futures = [] with self.executor_factory(max_workers=10) as w: for a in asgs: futures.append( w.submit(self.process_asg, a, msg)) for f in as_completed(futures): if f.exception(): log.exception("Exception processing asg:%s" % ( a['AutoScalingGroupName'])) continue
def access(config, accounts=()): """Check iam permissions for log export access in each account""" config = validate.callback(config) accounts_report = [] def check_access(account): accounts_report.append(account) session = get_session(account['role']) identity = session.client('sts').get_caller_identity() account['account_id'] = identity['Account'] account.pop('groups') account.pop('role') client = session.client('iam') policy_arn = identity['Arn'] if policy_arn.count('/') > 1: policy_arn = policy_arn.rsplit('/', 1)[0] if ':sts:' in policy_arn: policy_arn = policy_arn.replace(':sts', ':iam') if ':assumed-role' in policy_arn: policy_arn = policy_arn.replace(':assumed-role', ':role') evaluation = client.simulate_principal_policy( PolicySourceArn=policy_arn, ActionNames=['logs:CreateExportTask'])['EvaluationResults'] account['access'] = evaluation[0]['EvalDecision'] with ThreadPoolExecutor(max_workers=16) as w: futures = {} for account in config.get('accounts', ()): if accounts and account['name'] not in accounts: continue futures[w.submit(check_access, account)] = None for f in as_completed(futures): pass accounts_report.sort(key=operator.itemgetter('access'), reverse=True) print(tabulate(accounts_report, headers='keys'))
def updateall(self,db,settings): #ref: http://pythonhosted.org/futures/ from concurrent import futures from datetime import datetime,timedelta print("invoked") self.log.info("Update all method invoked") nobj= Notification() # loop through feeds that are not updated in last 30 mins #http://stackoverflow.com/questions/4541629/how-to-create-a-datetime-equal-to-15-minutes-ago try: tilltime = Helper.datetotimestamp(datetime.now()-timedelta(minutes=15)) feedlist = [] for feed in db.feeds.find({'lastupdated':{'$lt':tilltime}},{'items':0}): # and call updatefeed for each feedurl feedlist.append(feed['_id']) if len(feedlist) > 0: with futures.ThreadPoolExecutor(max_workers=5) as executor: future_to_url = dict((executor.submit(self.updatefeed,db, feed, settings), feed) for feed in feedlist) for future in futures.as_completed(future_to_url): feed = future_to_url[future] if future.exception() is not None: err = '%r generated an exception: %s' % (feed,future.exception()) nobj.adderror(err) print(err) else: print('%r page is done' % (feed)) except Exception,err: nobj.adderror(str(err)) self.log.error("Update all feeds failed with error %s",str(err))
def handle(self, *args, **options): city_names = options['cities'] n_threads = options['n_threads'] if city_names[0] == 'ALL': city_names = services.get_active_city_names() # Instantiate weather service weather_service = weather.OpenWeatherMapService() def fetch_weather_update(city_name): try: made_update = services.fetch_weather_update(city_name=city_name, weather_service=weather_service) except services.errors.CityNotFound: return self.style.ERROR(f'City "{city_name}" does not exist') except services.errors.CityIsDisabled: return self.style.ERROR(f'City "{city_name}" is disabled') if made_update: return self.style.SUCCESS('Successfully fetched new weather update for city ' + \ f'"{city_name}"') else: return self.style.WARNING(f'No new weather update found for city "{city_name}"') pool = futures.ThreadPoolExecutor(n_threads) jobs = [] for city_name in city_names: jobs.append(pool.submit(fetch_weather_update, city_name)) for job in futures.as_completed(jobs): self.stdout.write(job.result())
def process(self, buckets): from c7n.mu import LambdaManager from c7n.ufuncs.s3crypt import get_function func = get_function( None, self.data.get('role', self.manager.config.assume_role)) # Publish function to all of our buckets regions region_funcs = {} regions = set([ b.get('LocationConstraint', 'us-east-1') for b in buckets]) for r in regions: lambda_mgr = LambdaManager( functools.partial(self.manager.session_factory, region=r)) region_funcs[r] = lambda_mgr.publish(func) with self.executor_factory(max_workers=3) as w: results = [] futures = [] for b in buckets: futures.append( w.submit( self.process_bucket, region_funcs[b.get('LocationConstraint', 'us-east-1')], b)) for f in as_completed(futures): if f.exception(): log.exception( "Error attaching lambda-encrypt %s" % (f.exception())) results.append(f.result()) return filter(None, results)
def generate_normalized_wiggle_files(project_folder, max_proc): parameter_dict = _read_parameters(project_folder) # create normalized coverage folder if it does not exist wiggle_folder = "{}/normalized_coverage".format(project_folder) if not exists(wiggle_folder): makedirs(wiggle_folder) # Generate coverage files in parallel print("** Generating normalized coverage files for {} libraries...".format( len(parameter_dict["libraries"])), flush=True) t_start = time() with futures.ProcessPoolExecutor( max_workers=max_proc) as executor: future_to_lib_name = { executor.submit( _generate_normalized_wiggle_file_for_lib, lib_name, lib["bam_file"], parameter_dict["paired_end"], parameter_dict["max_insert_size"], lib["size_factor"], wiggle_folder): lib_name for lib_name, lib in parameter_dict["libraries"].items()} for future in futures.as_completed(future_to_lib_name): lib_name = future_to_lib_name[future] print("* Coverage files for library {} generated.".format(lib_name), flush=True) t_end = time() print("Coverage file generation finished in {} seconds.".format( t_end-t_start), flush=True)
def process(self, resources): # Legacy msg = self.data.get('msg') msg = self.data.get('value') or msg tag = self.data.get('tag', DEFAULT_TAG) tag = self.data.get('key') or tag # Support setting multiple tags in a single go with a mapping tags = self.data.get('tags') if tags is None: tags = [] else: tags = [{'Key': k, 'Value': v} for k, v in tags.items()] if msg: tags.append({'Key': tag, 'Value': msg}) batch_size = self.data.get('batch_size', self.batch_size) with self.executor_factory(max_workers=self.concurrency) as w: futures = [] for resource_set in utils.chunks(resources, size=batch_size): futures.append( w.submit(self.process_resource_set, resource_set, tags)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception removing tags: %s on resources:%s \n %s" % ( tags, self.id_key, f.exception()))
def process(self, resources): # Move this to policy? / no resources bypasses actions? if not len(resources): return msg_tmpl = self.data.get( 'msg', 'Resource does not meet policy: {op}@{action_date}') op = self.data.get('op', 'stop') tag = self.data.get('tag', DEFAULT_TAG) date = self.data.get('days', 4) n = datetime.now(tz=tzutc()) action_date = n + timedelta(days=date) msg = msg_tmpl.format( op=op, action_date=action_date.strftime('%Y/%m/%d')) self.log.info("Tagging %d resources for %s on %s" % ( len(resources), op, action_date.strftime('%Y/%m/%d'))) tags = [{'Key': tag, 'Value': msg}] with self.executor_factory(max_workers=2) as w: futures = [] for resource_set in utils.chunks(resources, size=200): futures.append( w.submit(self.process_resource_set, resource_set, tags)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception tagging resource set: %s \n %s" % ( tags, f.exception()))
def record_set(session_factory, bucket, key_prefix, start_date): """Retrieve all s3 records for the given policy output url From the given start date. """ s3 = local_session(session_factory).client('s3') records = [] key_count = 0 marker = key_prefix.strip("/") + "/" + start_date.strftime( '%Y/%m/%d/00') + "/resources.json.gz" p = s3.get_paginator('list_objects').paginate( Bucket=bucket, Prefix=key_prefix.strip('/') + '/', Marker=marker ) with ThreadPoolExecutor(max_workers=20) as w: for key_set in p: if 'Contents' not in key_set: continue keys = [k for k in key_set['Contents'] if k['Key'].endswith('resources.json.gz')] key_count += len(keys) futures = map(lambda k: w.submit(get_records, bucket, k, session_factory), keys) for f in as_completed(futures): records.extend(f.result()) log.info("Fetched %d records across %d files" % ( len(records), key_count)) return records
def process_messages(self, messages): future_to_message = {} to_delete = [] self.logger.debug('processing %d messages', len(messages)) for message in messages: # ThreadPoolExecutor will throw a RuntimeException if we try # to # submit while it's shutting down. If we encounter a # RuntimeError, # immediately stop trying to submit new tasks; # they will get requeued after the interval configured on the # queue's policy. try: future = self.pool.submit(self.func, message) except RuntimeError: self.logger.exception('cannot submit jobs to pool') raise else: future_to_message[future] = message for future in futures.as_completed(future_to_message, timeout=self.timeout): message = future_to_message[future] try: future.result() except: self.logger.exception('exception processing message %s', message['MessageId']) else: to_delete.append(message) return to_delete
def run(config, use, output_dir, accounts, tags, region, policy, policy_tags, cache_period, metrics, dryrun, debug, verbose): """run a custodian policy across accounts""" accounts_config, custodian_config, executor = init( config, use, debug, verbose, accounts, tags, policy, policy_tags=policy_tags) policy_counts = Counter() with executor(max_workers=WORKER_COUNT) as w: futures = {} for a in accounts_config['accounts']: for r in resolve_regions(region or a.get('regions', ())): futures[w.submit( run_account, a, r, custodian_config, output_dir, cache_period, metrics, dryrun, debug)] = (a, r) for f in as_completed(futures): a, r = futures[f] if f.exception(): if debug: raise log.warning( "Error running policy in %s @ %s exception: %s", a['name'], r, f.exception()) for p, count in f.result().items(): policy_counts[p] += count log.info("Policy resource counts %s" % policy_counts)
def _poll_run_states(self): # in every iteration the states of all unfinished runs are requested once while not self._shutdown.is_set() : start = time() states = {} with self._web_interface._unfinished_runs_lock: for run_id in self._web_interface._unfinished_runs.keys(): state_future = self._state_poll_executor.submit(self._web_interface._is_finished, run_id) states[state_future] = run_id # Collect states of runs for state_future in as_completed(states.keys()): run_id = states[state_future] state = state_future.result() if state == "FINISHED" or state == "UNKNOWN": self._web_interface._download_result_async(run_id) elif state == "ERROR": self._web_interface._run_failed(run_id) end = time(); duration = end - start if duration < self._result_poll_interval and not self._shutdown.is_set(): self._shutdown.wait(self._result_poll_interval - duration)
def test_no_timeout(self): def wait_test(): while not future1._waiters: pass call1.set_can() call2.set_can() call1 = Call(manual_finish=True) call2 = Call(manual_finish=True) try: future1 = self.executor.submit(call1) future2 = self.executor.submit(call2) t = threading.Thread(target=wait_test) t.start() completed = set(futures.as_completed( [CANCELLED_AND_NOTIFIED_FUTURE, EXCEPTION_FUTURE, SUCCESSFUL_FUTURE, future1, future2])) self.assertEqual(set( [CANCELLED_AND_NOTIFIED_FUTURE, EXCEPTION_FUTURE, SUCCESSFUL_FUTURE, future1, future2]), completed) finally: call1.close() call2.close()
def process(self, resources, event=None): days = self.data.get('days', 14) duration = timedelta(days) self.metric = self.data['name'] self.end = datetime.utcnow() self.start = self.end - duration self.period = int(self.data.get('period', duration.total_seconds())) self.statistics = self.data.get('statistics', 'Average') self.model = self.manager.get_model() self.op = OPERATORS[self.data.get('op', 'less-than')] self.value = self.data['value'] ns = self.data.get('namespace') if not ns: ns = getattr(self.model, 'default_namespace', None) if not ns: ns = self.DEFAULT_NAMESPACE[self.model.service] self.namespace = ns self.log.debug("Querying metrics for %d", len(resources)) matched = [] with self.executor_factory(max_workers=3) as w: futures = [] for resource_set in chunks(resources, 50): futures.append( w.submit(self.process_resource_set, resource_set)) for f in as_completed(futures): if f.exception(): self.log.warning( "CW Retrieval error: %s" % f.exception()) continue matched.extend(f.result()) return matched
def process(self, resources): resources = self.filter_table_state( resources, self.valid_status) if not len(resources): return c = local_session(self.manager.session_factory).client('dynamodb') futures = {} prefix = self.data.get('prefix', 'Backup') with self.executor_factory(max_workers=2) as w: for t in resources: futures[w.submit( c.create_backup, BackupName=snapshot_identifier( prefix, t['TableName']), TableName=t['TableName'])] = t for f in as_completed(futures): t = futures[f] if f.exception(): self.manager.log.warning( "Could not complete DynamoDB backup table:%s", t) arn = f.result()['BackupDetails']['BackupArn'] t['c7n:BackupArn'] = arn
def play(num_questions, path_directory, path_screenshot): # Create a new Quiz quiz = Quiz(path_directory) # Create a ThreadPool to parallelize the work from here on pool = ThreadPoolExecutor(max_workers=4) # For each question for i in range(1, num_questions + 1): # Waiting for user input... try: c = input("Press enter to evaluate a new question, e to exit: ") except KeyboardInterrupt: # If CTRL+C, break print("") # Go to a new line break # If the user decided to exit, break if c == 'e': break # If we are here to evaluate only one question if path_screenshot: # Load the screenshot from disk as cv2 grey object screenshot = Screenshot.load_image(path_screenshot) # Or we have to process an entire directory of questions elif path_directory: # Go through all screenshots in the specified directory filename = f"{path_directory}/Question-{i}.png" screenshot = Screenshot.load_image(filename) # If there isn't neither a screenshots nor a directory else: # Define the path for a new screenshot file filename = f"{quiz.folder_name}/Question-{i}.png" # Take a black-n-white screenshot screenshot = Screenshot.take_screenshot(filename) # Create a new Question object and extract the question from the screenshot question = quiz.new_question(Screenshot.extract_question(screenshot)) # Extract all three answers from the screenshot, in three different threads future_answers = { pool.submit(Screenshot.extract_answer, screenshot, question, position): position for position in range(3) } future_question = pool.submit(Sanitize.clean_question(question)) # Wait until all threads are done for future in as_completed(future_answers): pass print(f"\nQuestion n.{i}: {question.get_text()}") print( f"Answers: [{question.get_answer(0).get_text()}, {question.get_answer(1).get_text()}, {question.get_answer(2).get_text()}]" ) # Briefly ... later # Define the query URL query_url = Scraping.define_url(question.get_text()) # Get search results from Google google_results = Scraping.search(query_url) # Parallelize the pattern matching process with all the results future_guess = { pool.submit(Scraping.guess_answer, google_results, question.get_answer(position)): position for position in range(3) } # If at least one result was found for future in as_completed(future_guess): if future.result(): question.one_match = True # If at least one answer has a match if question.one_match: # Get and print the answer with the highest matches number guessed = question.get_answer_max_matches() print( f"\n{Style.BRIGHT}{Fore.GREEN}{guessed.get_text():>40} {Fore.CYAN}{guessed.get_matches():<40}{Fore.RESET}{Style.RESET_ALL}" ) # Save a reference to the guessed answer (non-zero indexed) question.set_guessed_answer(question.answers.index(guessed) + 1) else: print( f"{Style.DIM}{Fore.YELLOW}No match found, trying a more in depth analysis...{Fore.RESET}{Style.RESET_ALL}\n" ) # Perform a query built concatenating the question and each answer # Print sort-of table header (17+Answer+17, Score+5, 1+Results+2, Total) # I know it's ugly, maybe I'll use Rich lib print( f"{Style.BRIGHT} Answer Score Results Total{Style.RESET_ALL}" ) # Check if this is an usual question or a "negated" question (explaination below) question.usual_question = "NON" not in question.get_text() # Parallelize the pattern matching process with all the results future_calculation = { pool.submit( Scraping.calculate_concat, question.get_text() if question.usual_question else question.get_text().replace("NON", ''), question.get_answer(position)): position for position in range(3) } for future in as_completed(future_calculation): print(future.result()) # If the answers scored the same, then something went wrong if question.get_answer(0).score == question.get_answer( 1).score == question.get_answer(2).score: print( f"\n{Style.BRIGHT}{Fore.RED}Choose a random answer, the search was not successful!{Fore.RESET}{Style.RESET_ALL}" ) # why not suggest a random answer? # Otherwise, let's assume the answer with the highest score is fair else: guessed = question.get_answer_max_score( ) if question.usual_question else question.get_answer_min_score( ) print( f"\n{Style.BRIGHT}{Fore.GREEN}{guessed.get_text():>40} {Fore.CYAN}{guessed.score:<40}{Fore.RESET}{Style.RESET_ALL}" ) # Save a reference to the guessed answer (non-zero indexed) question.set_guessed_answer( question.answers.index(guessed) + 1) # About the algorithm for "negated" questions ("which of these... not..."): # if in the question is required to indicate the answer that does NOT belong # to a certain category, then a Google search concatenating the question without # the "not" ("NON", in italian) word and the answer is executed and then # the answer that obtains the minimum score instead of the maximum, is taken. # Save the (real) correct answer for debug and analysis purpose, # but only if the report file doesn't already exist if not quiz.report_exists: question.set_correct_answer( int(input("\nWhat was the correct answer? (1,2,3): "))) # Print a bunch (80) of underscore to separate different question print( "________________________________________________________________________________\n" ) # Shutdown the ThreadPool pool.shutdown() # Save the report if it doesn't already exist if not quiz.report_exists: quiz.save_report()
def map_unordered(cls, function, items, multiprocess=False, file=None, step=100, ipython_widget=False, multiprocessing_start_method=None): """Map function over items, reporting the progress. Does a `map` operation while displaying a progress bar with percentage complete. The map operation may run on arbitrary order on the items, and the results may be returned in arbitrary order. :: def work(i): print(i) ProgressBar.map(work, range(50)) Parameters ---------- function : function Function to call for each step items : sequence Sequence where each element is a tuple of arguments to pass to *function*. multiprocess : bool, int, optional If `True`, use the `multiprocessing` module to distribute each task to a different processor core. If a number greater than 1, then use that number of cores. ipython_widget : bool, optional If `True`, the progress bar will display as an IPython notebook widget. file : writable file-like, optional The file to write the progress bar to. Defaults to `sys.stdout`. If ``file`` is not a tty (as determined by calling its `isatty` member, if any), the scrollbar will be completely silent. step : int, optional Update the progress bar at least every *step* steps (default: 100). If ``multiprocess`` is `True`, this will affect the size of the chunks of ``items`` that are submitted as separate tasks to the process pool. A large step size may make the job complete faster if ``items`` is very long. multiprocessing_start_method : str, optional Useful primarily for testing; if in doubt leave it as the default. When using multiprocessing, certain anomalies occur when starting processes with the "spawn" method (the only option on Windows); other anomalies occur with the "fork" method (the default on Linux). """ # concurrent.futures import here to avoid import failure when running # in pyodide/Emscripten from concurrent.futures import ProcessPoolExecutor, as_completed results = [] if file is None: file = _get_stdout() with cls(len(items), ipython_widget=ipython_widget, file=file) as bar: if bar._ipython_widget: chunksize = step else: default_step = max(int(float(len(items)) / bar._bar_length), 1) chunksize = min(default_step, step) if not multiprocess or multiprocess < 1: for i, item in enumerate(items): results.append(function(item)) if (i % chunksize) == 0: bar.update(i) else: ctx = multiprocessing.get_context(multiprocessing_start_method) kwargs = dict(mp_context=ctx) with ProcessPoolExecutor( max_workers=(int(multiprocess) if multiprocess is not True else None), **kwargs) as p: for i, f in enumerate( as_completed( p.submit(function, item) for item in items)): bar.update(i) results.append(f.result()) return results
data = { 'code': file.replace('.jpg', ''), 'image': file_path, 'image_type': 'BASE64', 'options': { 'max_face_num': 1, 'face_field': "age,beauty,gender", }, 'aip': aip_list[it], 'mongo': mongo_conn } it += 1 it %= len(aip_list) # 向线程池提交任务 work = executor.submit(detect_face, data) all_work.append(work) else: util.print_e('相册中包含了文件夹{}'.format(file)) util.process_bar(index + 1, len(file_list), '已完成{}条任务的提交'.format(index + 1)) # 等待线程池中所有任务结束 util.print_a('正在等待所有任务结束') rowcount = completed_work = 0 for work in as_completed(all_work): # 利用as_completed的阻塞效果完成信息的提取 rowcount += work.result() completed_work += 1 util.process_bar(completed_work, len(file_list), '完成%d项,修改%d项数据' % (completed_work, rowcount))
def scrap(start=1): # Choose the start blocks to scrap if start > 1: # Continue from the last break results = np.load('results' + str(start) + '.npy').item() else: # Start from the begining of the block results = dict() # Save the current block checkpoint = start try: for i in range(0, (len(urls) - start) // fact + 1): start_time = time.time() # Multiple threads for scrap with ThreadPoolExecutor(max_workers=max_workers) as executor: end = start + fact * (i + 1) if end > len(urls): end = len(urls) + 1 # Create threads futures = { executor.submit(loadurl, url) for url in urls[checkpoint:end] } # as_completed() gives you the threads once finished for f in as_completed(futures): # Get the results rs = f.result() if rs[1] in results.keys(): results[rs[1]] += rs[0] else: results[rs[1]] = rs[0] elapsed_time = time.time() - start_time # Calculate time elapsed for this scrap e = int(time.time() - start_time) # Everthing Okay, update the latest checkpoint checkpoint = end # Save result to file np.save('results' + str(checkpoint) + '.npy', results) # Only keep the lastest three files remove('results' + str(checkpoint - fact * 3) + '.npy') print( "Range: " + str(checkpoint) + " ", '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60), e % 60)) except: if debug: traceback.print_exc() print("Exception, re-call function main(%s)" % (str(checkpoint))) time.sleep(3) # Exception happend, restart from the last checkpoint scrap(checkpoint)
def main(): executor = futures.ProcessPoolExecutor(max_workers=4) args = range(10) fs = [executor.submit(func, arg) for arg in args] for f in futures.as_completed(fs): print(f.result())
def sync( api: BaiduPCSApi, localdir: str, remotedir: str, max_workers: int = CPU_NUM, slice_size: int = DEFAULT_SLICE_SIZE, show_progress: bool = True, ): localdir = Path(localdir).as_posix() remotedir = Path(remotedir).as_posix() is_file = api.is_file(remotedir) assert not is_file, "remotedir must be a directory" if not api.exists(remotedir): all_pcs_files = {} else: all_pcs_files = { pcs_file.path[len(remotedir) + 1:]: pcs_file for pcs_file in recursive_list(api, remotedir) } fts: List[FromTo] = [] check_list: List[Tuple[str, PcsFile]] = [] all_localpaths = set() for localpath in walk(localdir): path = localpath[len(localdir) + 1:] all_localpaths.add(path) if path not in all_pcs_files: fts.append(FromTo(localpath, join_path(remotedir, path))) else: check_list.append((localpath, all_pcs_files[path])) semaphore = Semaphore(max_workers) with ThreadPoolExecutor(max_workers=CPU_NUM) as executor: tasks = {} for lp, pf in check_list: semaphore.acquire() fut = executor.submit(sure_release, semaphore, check_file_md5, lp, pf) tasks[fut] = (lp, pf) for fut in as_completed(tasks): is_equal = fut.result() lp, pf = tasks[fut] if not is_equal: fts.append(FromTo(lp, pf.path)) _upload( api, fts, max_workers=max_workers, slice_size=slice_size, ignore_existing=False, show_progress=show_progress, ) to_deletes = [] for rp in all_pcs_files.keys(): if rp not in all_localpaths: to_deletes.append(all_pcs_files[rp].path) if to_deletes: api.remove(*to_deletes) print(f"Delete: [i]{len(to_deletes)}[/i] remote paths")
X_train, X_test, y_train, y_test = train_test_split(X, y_true.reshape(-1, 1), test_size=0.15, random_state=1869097) N_list = [5, 10, 15, 20, 25, 30] K = 5 print("N = [5, 10, 15, 20, 25, 30]") print("sigma = [0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.5, 1.8]") print("rho = [0.00001, 0.00005, 0.0001, 0.0005, 0.001]\n") ncpus = cpu_count() results = [] with ProcessPoolExecutor(max_workers=ncpus) as executor: futures = list( (executor.submit(grid_search, X_train, y_train, N, K) for N in N_list)) for future in as_completed(futures): results += future.result() final_res = pd.DataFrame(results, columns=[ 'gradient', 'K', 'N', 'sigma', 'rho', 'success', 'train_error', 'train_error_fit', 'validation_error', 'time_exec(s)', 'nfev', 'nit', 'njev' ]) final_res.to_csv('KFOLD_MLP.csv', index=False)
package=package, base=base.fullname)) continue build_order = get_build_order(package, orig) up_map = {orig: base.fullname} if args.remote is not None: print('Update from "%s" to "%s"' % (orig, base.fullname)) for group in build_order: with ThreadPoolExecutor(max_workers=4) as executor: pkgs = (executor.submit(Worker.run, worker, ConanPackge(package), package_urls[package], up_map, args.remote) for package in group) add = {} for future in as_completed(pkgs): version = future.result() if version is not None: add[package] = version if args.remote is not None: print('Update from "%s" to "%s"' % (orig, version)) up_map.update(add) conanfile = os.path.join(package, 'conanfile.txt') if os.path.isfile(conanfile): with open(conanfile, 'r') as content_file: content = content_file.read() for old, val in up_map.items(): content = content.replace('\n' + old, '\n' + val) with open(conanfile, 'w') as content_file: content_file.write(content)
def as_completed(self): for f in futures.as_completed(self._tasks): self._tasks.remove(f) yield f.result()
from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED, FIRST_COMPLETED, \ as_completed import time, random def do_sth(i): time.sleep(random.random() * 2) return i * i if __name__ == '__main__': ppe = ProcessPoolExecutor(3) objs = [ppe.submit(do_sth, i) for i in range(1, 5)] future_iterator = as_completed(objs) # 个人理解:as 为当,整体意思是当完成的时候 for future in future_iterator: print(future.result()) """ 9 4 16 1 """ """ 小结 标准库模块 concurrent.futures 还提供了两个函数: wait() 和 as_completed() as_completed(fs, timeout=None): 该函数用于将指定的 Future 实例对象序列转换为迭代器。
def warmup(data, out_trace, registries, threads): dedup = {} # dup_cnt = 0 total_cnt = 0 trace = {} results = [] process_data = [] global ring ring = HashRing(nodes = registries) manifs_cnt = 0 for request in data: unique = True manifs = False if (request['method']) == 'GET': if 'manifest' in request['uri']: manifs = True uri = request['uri'] layer_id = uri.split('/')[-1] total_cnt += 1 try: dup_cnt = dedup[layer_id] # dup_cnt += 1 # dedup[layer_id] += 1 unique = False except Exception as e: dedup[layer_id] = 1 if manifs: manifs_cnt += 1 if unique: registry_tmp = ring.get_node(layer_id) # which registry should store this layer/manifest? process_data.append((registry_tmp, request)) print("total warmup unique requests:", len(process_data)) print("unique manifest cnt: ", manifs_cnt) #split request list into sublists #n = len(process_data) n = 100 process_slices = [process_data[i:i + n] for i in xrange(0, len(process_data), n)] for s in process_slices: with ProcessPoolExecutor(max_workers=threads) as executor: futures = [executor.submit(send_warmup_thread, req) for req in s] for future in as_completed(futures): # print(future.result()) try: x = future.result() for k in x['trace']: if x['trace'][k] != 'bad': trace[k] = x['trace'][k] results.append(x['result']) except Exception as e: print('warmup: something generated an exception: %s', e) #break stats(results) #time.sleep(600) with open(out_trace, 'w') as f: json.dump(trace, f) stats(results) with open('warmup_push_performance.json', 'w') as f: json.dump(results, f) print "max threads:" + str(threads) print 'unique count: ' + str(len(dedup)) print 'total count: ' + str(total_cnt) print "total warmup unique requests: (for get layer/manifest requests)" + str(len(process_data))
executor = ThreadPoolExecutor(max_workers=2) #通过submit函数提交执行的函数到线程池中, submit 是立即返回 # task1 = executor.submit(get_html, (3)) # 返回的task1为Future对象 # task2 = executor.submit(get_html, (2)) #要获取已经成功的task的返回 urls = [3, 2, 4] all_task = [executor.submit(get_html, (url)) for url in urls] # # Wait for the futures in the given sequence to complete # wait(all_task, return_when=FIRST_COMPLETED) # print("main") # 谁先完成,谁先输出 for future in as_completed(all_task): data = future.result() print("get {} page".format(data)) """ 输出: get page 2 success get 2 page get page 3 success get 3 page get page 4 success get 4 page """ #通过executor的map获取已经完成的task的值 #打印data的顺序和urls的顺序一样 # for data in executor.map(get_html, urls): # print("get {} page".format(data))
def blocks(self, start=None, stop=None, max_batch_size=None, threading=False, thread_num=8, only_ops=False, only_virtual_ops=False): """ Yields blocks starting from ``start``. :param int start: Starting block :param int stop: Stop at this block :param int max_batch_size: only for appbase nodes. When not None, batch calls of are used. Cannot combine with threading :param bool threading: Enables threading. Cannot be combined with batch calls :param int thread_num: Defines the number of threads, when `threading` is set. :param bool only_ops: Only yielding operations, when set to True (default: False) :param bool only_virtual_ops: Only yield virtual operations (default: False) .. note:: If you want instant confirmation, you need to instantiate class:`beem.blockchain.Blockchain` with ``mode="head"``, otherwise, the call will wait until confirmed in an irreversible block. """ # Let's find out how often blocks are generated! current_block = self.get_current_block() current_block_num = current_block.block_num if not start: start = current_block_num head_block_reached = False # We are going to loop indefinitely while True: # Get chain properies to identify the if stop: head_block = stop else: current_block_num = self.get_current_block_num() head_block = current_block_num if threading and FUTURES_MODULE and not head_block_reached: pool = ThreadPoolExecutor(max_workers=thread_num + 1) # disable autoclean auto_clean = current_block.get_cache_auto_clean() current_block.set_cache_auto_clean(False) latest_block = 0 for blocknum in range(start, head_block + 1, thread_num): futures = [] i = blocknum while i < blocknum + thread_num and i <= head_block: futures.append( pool.submit(Block, i, only_ops=only_ops, only_virtual_ops=only_virtual_ops, steem_instance=self.steem)) i += 1 results = [r.result() for r in as_completed(futures)] block_nums = [] for b in results: block_nums.append(int(b.identifier)) if latest_block < int(b.identifier): latest_block = int(b.identifier) from operator import itemgetter blocks = sorted(results, key=itemgetter('id')) for b in blocks: yield b current_block.clear_cache_from_expired_items() if latest_block < head_block: for blocknum in range(latest_block, head_block + 1): block = Block(blocknum, only_ops=only_ops, only_virtual_ops=only_virtual_ops, steem_instance=self.steem) yield block current_block.set_cache_auto_clean(auto_clean) elif max_batch_size is not None and ( head_block - start) >= max_batch_size and not head_block_reached: if not self.steem.is_connected(): return None self.steem.rpc.set_next_node_on_empty_reply(False) latest_block = start - 1 batches = max_batch_size for blocknumblock in range(start, head_block + 1, batches): # Get full block if (head_block - blocknumblock) < batches: batches = head_block - blocknumblock + 1 for blocknum in range(blocknumblock, blocknumblock + batches - 1): if only_virtual_ops: if self.steem.rpc.get_use_appbase(): # self.steem.rpc.get_ops_in_block({"block_num": blocknum, 'only_virtual': only_virtual_ops}, api="account_history", add_to_queue=True) self.steem.rpc.get_ops_in_block( blocknum, only_virtual_ops, add_to_queue=True) else: self.steem.rpc.get_ops_in_block( blocknum, only_virtual_ops, add_to_queue=True) else: if self.steem.rpc.get_use_appbase(): self.steem.rpc.get_block( {"block_num": blocknum}, api="block", add_to_queue=True) else: self.steem.rpc.get_block(blocknum, add_to_queue=True) latest_block = blocknum if batches >= 1: latest_block += 1 if latest_block <= head_block: if only_virtual_ops: if self.steem.rpc.get_use_appbase(): # self.steem.rpc.get_ops_in_block({"block_num": blocknum, 'only_virtual': only_virtual_ops}, api="account_history", add_to_queue=False) block_batch = self.steem.rpc.get_ops_in_block( blocknum, only_virtual_ops, add_to_queue=False) else: block_batch = self.steem.rpc.get_ops_in_block( blocknum, only_virtual_ops, add_to_queue=False) else: if self.steem.rpc.get_use_appbase(): block_batch = self.steem.rpc.get_block( {"block_num": latest_block}, api="block", add_to_queue=False) else: block_batch = self.steem.rpc.get_block( latest_block, add_to_queue=False) if not bool(block_batch): raise BatchedCallsNotSupported() blocknum = latest_block - len(block_batch) + 1 if not isinstance(block_batch, list): block_batch = [block_batch] for block in block_batch: if self.steem.rpc.get_use_appbase(): if only_virtual_ops: block = block["ops"] else: block = block["block"] block["id"] = blocknum yield Block(block, only_ops=only_ops, only_virtual_ops=only_virtual_ops, steem_instance=self.steem) blocknum += 1 else: # Blocks from start until head block for blocknum in range(start, head_block + 1): # Get full block block = self.wait_for_and_get_block( blocknum, only_ops=only_ops, only_virtual_ops=only_virtual_ops) yield block # Set new start start = head_block + 1 head_block_reached = True if stop and start > stop: # raise StopIteration return # Sleep for one block time.sleep(self.block_interval)
def get_multi_thread(nums): with ThreadPoolExecutor() as e: futures = [e.submit(fibonacci, num) for num in nums] for future in as_completed(futures): print(future.result())
def transform_data(from_client, from_project, from_logstore, from_time, to_time=None, to_client=None, to_project=None, to_logstore=None, shard_list=None, config=None, batch_size=None, compress=None, cg_name=None, c_name=None, cg_heartbeat_interval=None, cg_data_fetch_interval=None, cg_in_order=None, cg_worker_pool_size=None): """ transform data from one logstore to another one (could be the same or in different region), the time is log received time on server side. """ if not config: logger.info( "transform_data: config is not configured, use copy data by default." ) return copy_data(from_client, from_project, from_logstore, from_time, to_time=to_time, to_client=to_client, to_project=to_project, to_logstore=to_logstore, shard_list=shard_list, batch_size=batch_size, compress=compress) to_client = to_client or from_client # increase the timeout to 2 min at least from_client.timeout = max(from_client.timeout, 120) to_client.timeout = max(to_client.timeout, 120) to_project = to_project or from_project to_logstore = to_logstore or from_logstore if not cg_name: # batch mode to_time = to_time or "end" cpu_count = multiprocessing.cpu_count() * 2 shards = from_client.list_shards(from_project, from_logstore).get_shards_info() current_shards = [str(shard['shardID']) for shard in shards] target_shards = _parse_shard_list(shard_list, current_shards) worker_size = min(cpu_count, len(target_shards)) result = dict() total_count = 0 total_removed = 0 with ProcessPoolExecutor(max_workers=worker_size) as pool: futures = [ pool.submit(transform_worker, from_client, from_project, from_logstore, shard, from_time, to_time, config, to_client, to_project, to_logstore, batch_size=batch_size, compress=compress) for shard in target_shards ] for future in as_completed(futures): if future.exception(): logger.error( "get error when transforming data: {0}".format( future.exception())) else: partition, count, removed, processed, failed = future.result( ) total_count += count total_removed += removed if count: result[partition] = { "total_count": count, "transformed": processed, "removed": removed, "failed": failed } return LogResponse({}, {"total_count": total_count, "shards": result}) else: # consumer group mode c_name = c_name or "transform_data_{0}".format( multiprocessing.current_process().pid) cg_heartbeat_interval = cg_heartbeat_interval or 20 cg_data_fetch_interval = cg_data_fetch_interval or 2 cg_in_order = False if cg_in_order is None else cg_in_order cg_worker_pool_size = cg_worker_pool_size or 3 option = LogHubConfig( from_client._endpoint, from_client._accessKeyId, from_client._accessKey, from_project, from_logstore, cg_name, c_name, cursor_position=CursorPosition.SPECIAL_TIMER_CURSOR, cursor_start_time=from_time, cursor_end_time=to_time, heartbeat_interval=cg_heartbeat_interval, data_fetch_interval=cg_data_fetch_interval, in_order=cg_in_order, worker_pool_size=cg_worker_pool_size) TransformDataConsumer.set_transform_options(config, to_client, to_project, to_logstore) result = {"total_count": 0, "shards": {}} l = RLock() def status_updator(shard_id, count=0, removed=0, processed=0, failed=0): logger.info( "status update is called, shard: {0}, count: {1}, removed: {2}, processed: {3}, failed: {4}" .format(shard_id, count, removed, processed, failed)) with l: result["total_count"] += count if shard_id in result["shards"]: data = result["shards"][shard_id] result["shards"][shard_id] = { "total_count": data["total_count"] + count, "transformed": data["transformed"] + processed, "removed": data["removed"] + removed, "failed": data["failed"] + failed } else: result["shards"][shard_id] = { "total_count": count, "transformed": processed, "removed": removed, "failed": failed } worker = ConsumerWorker(TransformDataConsumer, consumer_option=option, args=(status_updator, )) worker.start() try: while worker.is_alive(): worker.join(timeout=60) logger.info( "transform_data: worker exit unexpected, try to shutdown it") worker.shutdown() except KeyboardInterrupt: logger.info("transform_data: *** try to exit **** ") print("try to stop transforming data.") worker.shutdown() worker.join(timeout=120) return LogResponse({}, result)
def test_duplicate_futures(self): # Issue 20367. Duplicate futures should not raise exceptions or give # duplicate responses. future1 = self.executor.submit(time.sleep, 2) completed = [f for f in futures.as_completed([future1,future1])] self.assertEqual(len(completed), 1)
def process_args(args): """Perform the actual processing according to the arguments""" # verbosity if args.verbose == 1: logging.basicConfig(stream=sys.stdout, level=logging.WARNING) elif args.verbose >= 2: logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) if args.blacklist: args.blacklist = load_blacklist(args.blacklist) # processing according to mutually exclusive options # read url list from input file if args.inputfile and args.feed is False and args.sitemap is False: inputdict = load_input_dict(args.inputfile, args.blacklist) url_processing_pipeline(args, inputdict) # fetch urls from a feed or a sitemap elif args.feed or args.sitemap: # load input URLs if args.inputfile: input_urls = load_input_urls(args.inputfile) elif args.feed: input_urls = [args.feed] elif args.sitemap: input_urls = [args.sitemap] # link discovery and storage inputdict = None with ThreadPoolExecutor(max_workers=args.parallel) as executor: if args.feed: future_to_url = { executor.submit(find_feed_urls, url): url for url in input_urls } elif args.sitemap: future_to_url = { executor.submit(sitemap_search, url, target_lang=args.target_language): url for url in input_urls } # process results one-by-one, i.e. in parallel for future in as_completed(future_to_url): if future.result() is not None: inputdict = convert_inputlist(args.blacklist, future.result(), args.url_filter, inputdict) url_processing_pipeline(args, inputdict) inputdict = None # read files from an input directory elif args.inputdir: file_processing_pipeline(args) # read from input directly else: # process input URL if args.URL: inputdict = convert_inputlist(args, [args.URL], None) url_processing_pipeline(args, inputdict) # process single url # process input on STDIN else: # file type and unicode check try: htmlstring = sys.stdin.read() except UnicodeDecodeError: sys.exit('ERROR: system, file type or buffer encoding') # process result = examine(htmlstring, args, url=args.URL) write_result(result, args)
import socket from concurrent.futures import ThreadPoolExecutor ss =socket.socket(socket.AF_INET,socket.SOCK_STREAM) addr = ('127.0.0.1',9327) ss.connect(addr) def recv(ss): while 1: msg=ss.recv(65535) print(msg.decode()) def send(ss): while 1: msg = input('请输入') ss.send(msg.encode()) pools = ThreadPoolExecutor(max_workers=20) from concurrent.futures import as_completed function_list = [recv,send] pools_list = list() for i in function_list: pools_list.append(pools.submit(i,ss)) for i in as_completed(pools_list): i.result()
def main(args: argparse.Namespace): if args.pool_executor == 'process': executor = ProcessPoolExecutor(max_workers=args.max_workers) elif args.pool_executor == 'thread': executor = ThreadPoolExecutor(max_workers=args.max_workers) else: raise ValueError(args.pool_executor) lines = [line for line in tqdm(args.input)] with executor: try: futs_map = { executor.submit(proc_sample, args.url, line_text): (line_no, line_text) for line_no, line_text in tqdm(enumerate(lines, start=1), total=len(lines)) if line_no >= args.begin_line and ( args.end_line <= 0 or line_no <= args.end_line) } prog_bar = tqdm(total=len(lines)) prog_bar.update(args.begin_line - 1) for fut in as_completed(futs_map): line_no, line_text = futs_map[fut] try: result = fut.result() except requests.HTTPError as err: # http status code 5xx if err.response.status_code in range(500, 600): prog_bar.write('发生分词错误:\n' ' CoreNLP 服务器内部错误: {}\n' ' 导致错误的样本: {}行'.format(err, line_no), file=sys.stderr) if not args.ignore_5xx: raise # http status code 5xx elif err.response.status_code in range(400, 500): prog_bar.write('发生分词错误:\n' ' CoreNLP 服务器收到的请求有误: {}\n' ' 导致错误的样本: {}行'.format(err, line_no), file=sys.stderr) if not args.ignore_4xx: raise else: # 其它 http 错误码 prog_bar.write('发生分词错误,任务异常中止:\n' ' CoreNLP 服务器返回错误: {}\n' ' 导致错误的样本: {}行\n' '{}'.format(err, line_no, line_text), file=sys.stderr) raise except KeyboardInterrupt: raise # re-raise except Exception as err: # 其它异常 prog_bar.write('分词的执行出现错误,任务异常中止: {}\n' ' 导致错误的样本: {}行\n' '{}'.format(err, line_no, line_text), file=sys.stderr) raise else: print(json.dumps(result, ensure_ascii=False), file=args.output) prog_bar.update() except KeyboardInterrupt: pass
def iter_documentation_builders( datasets: Optional[List[str]] = None, *, doc_util_paths: Optional[doc_utils.DocUtilPaths] = None, ) -> Iterator[BuilderDocumentation]: """Create dataset documentation string for given datasets. Args: datasets: list of datasets for which to create documentation. If None, then all available datasets will be used. doc_util_paths: Additional path for visualization, nightly info,... Yields: builder_documetation: The documentation information for each builder """ print('Retrieving the list of builders...') datasets = datasets or _all_tfds_datasets() # pytype: disable=attribute-error if doc_util_paths.fig_base_path: visu_doc_util = doc_utils.VisualizationDocUtil( base_path=doc_util_paths.fig_base_path, base_url=doc_util_paths.fig_base_url, ) else: visu_doc_util = None if doc_util_paths.df_base_path: df_doc_util = doc_utils.DataframeDocUtil( base_path=doc_util_paths.df_base_path, base_url=doc_util_paths.df_base_url, ) else: df_doc_util = None if doc_util_paths.fig_base_path: nightly_doc_util = doc_utils.NightlyDocUtil( path=doc_util_paths.nightly_path, ) else: nightly_doc_util = None # pytype: enable=attribute-error document_single_builder_fn = functools.partial( _document_single_builder, visu_doc_util=visu_doc_util, df_doc_util=df_doc_util, nightly_doc_util=nightly_doc_util, ) # Document all builders print(f'Document {len(datasets)} builders...') with futures.ThreadPoolExecutor( max_workers=_WORKER_COUNT_DATASETS) as tpool: tasks = [ tpool.submit(document_single_builder_fn, name) for name in datasets ] for future in tqdm.tqdm(futures.as_completed(tasks), total=len(tasks)): builder_doc = future.result() if builder_doc is None: # Builder filtered continue else: tqdm.tqdm.write( f'Documentation generated for {builder_doc.name}...') yield builder_doc print('All builder documentations generated!')
def upload( api: BaiduPCSApi, from_to_list: List[FromTo], ondup: str = "overwrite", encrypt_key: Any = None, salt: Any = None, encrypt_type: EncryptType = EncryptType.No, max_workers: int = CPU_NUM, slice_size: int = DEFAULT_SLICE_SIZE, ignore_existing: bool = True, show_progress: bool = True, ): """Upload from_tos Args: max_workers (int): The number of concurrent workers slice_size (int): The size of slice for uploading slices. ignore_existing (bool): Ignoring these localpath which of remotepath exist. show_progress (bool): Show uploading progress. """ excepts = {} semaphore = Semaphore(max_workers) with _progress: with ThreadPoolExecutor(max_workers=max_workers) as executor: futs = {} for from_to in from_to_list: semaphore.acquire() task_id = None if show_progress: task_id = _progress.add_task("upload", start=False, title=from_to.from_) fut = executor.submit( sure_release, semaphore, upload_file, api, from_to, ondup, encrypt_key=encrypt_key, salt=salt, encrypt_type=encrypt_type, slice_size=slice_size, ignore_existing=ignore_existing, task_id=task_id, ) futs[fut] = from_to for fut in as_completed(futs): e = fut.exception() if e is not None: from_to = futs[fut] excepts[from_to] = e # Summary if excepts: table = Table(title="Upload Error", box=SIMPLE, show_edge=False) table.add_column("From", justify="left", overflow="fold") table.add_column("To", justify="left", overflow="fold") table.add_column("Error", justify="left") for from_to, e in sorted(excepts.items()): table.add_row(from_to.from_, Text(str(e), style="red")) _progress.console.print(table)
def __init__(self, dirName, nEvent=-1, **kwargs): super(HEPCNNSplitDataset, self).__init__() syslogger = kwargs['syslogger'] if 'syslogger' in kwargs else None nWorkers = kwargs['nWorkers'] if 'nWorkers' in kwargs else 8 if syslogger: syslogger.update(annotation='open file ' + dirName) self.dirName = dirName self.maxEventsList = [ 0, ] self.imagesList = [] self.labelsList = [] self.weightsList = [] self.fileIdx = -1 if syslogger: syslogger.update(annotation='read files') nEventsTotal = 0 for fileName in sorted(listdir(self.dirName)): if not fileName.endswith('h5'): continue data = h5py.File(self.dirName + '/' + fileName, 'r') suffix = "_val" if 'images_val' in data['all_events'] else "" images = ( fileName, 'all_events/images' + suffix ) ## Keep the filename and image path only, and load them later with multiproc. #images = data['all_events/images'+suffix] labels = data['all_events/labels' + suffix] weights = data['all_events/weights' + suffix] if nEvent > 0: #images = images[:nEvent-nEventsTotal] ## We'll do this step after (re)loading the images labels = labels[:nEvent - nEventsTotal] weights = weights[:nEvent - nEventsTotal] nEventsInFile = len(weights) nEventsTotal += nEventsInFile self.maxEventsList.append(nEventsTotal) labels = torch.Tensor(labels[()]) weights = torch.Tensor(weights[()]) ## We will do this step for images later self.imagesList.append(images) self.labelsList.append(labels) self.weightsList.append(weights) if nEvent > 0 and nEventsTotal >= nEvent: break if syslogger: syslogger.update(annotation='Convert images to Tensor') env_kmp = environ['KMP_AFFINITY'] if 'KMP_AFFINITY' in environ else None environ['KMP_AFFINITY'] = 'none' jobs = [] with futures.ProcessPoolExecutor(max_workers=nWorkers) as pool: for fileIdx in range(len(self.maxEventsList) - 1): job = pool.submit(self.imageToTensor, fileIdx) jobs.append(job) for job in futures.as_completed(jobs): fileIdx, images = job.result() self.imagesList[fileIdx] = images if env_kmp != None: environ['KMP_AFFINITY'] = env_kmp for fileIdx in range(len(self.maxEventsList) - 1): #images = torch.Tensor(self.imagesList[fileIdx][()]) images = self.imagesList[fileIdx] self.shape = images.shape if self.shape[-1] <= 5: ## actual format was NHWC. convert to pytorch native format, NCHW images = images.permute(0, 3, 1, 2) self.shape = images.shape if syslogger: syslogger.update(annotation="Convert image format") self.imagesList[fileIdx] = images self.channel, self.height, self.width = self.shape[1:] if nEvent > 0: images = images[:nEvent - nEventsTotal]
def _crack_http_auth(self, url, logfile, threads=20, exit_on_success=True): """ check for http auth type and crack login """ futures = deque() headers = {'User-Agent': f"'{self.useragent}'"} s = requests.session() h = s.head(url, verify=False, headers=headers).headers auth_header = '' if 'WWW-Authenticate' in h: auth_header = h['WWW-Authenticate'] def crack(s, url, h, a, u, p): code = s.head(url, verify=False, headers=h, auth=a(f'{u}', f'{p}')).status_code if code == 200: return f'Login found: {u}:{p}' return if 'Basic realm' in auth_header: auth_type = HTTPBasicAuth elif 'Digest realm' in auth_header: auth_type = HTTPDigestAuth else: # todo: proxy auth etc. return # single username + single password if self.opts['user'] and self.opts['pass']: us = self.opts['user'] pw = self.opts['pass'] r = s.head(url, headers=headers, verify=False, auth=auth_type(f'{us}', f'{pw}')) if r.status_code == 200: self._log(logfile, f'Login found: {us}:{pw}') if exit_on_success: return # single username + password list if self.opts['user'] and self.opts['plists']: us = self.opts['user'] for pwlist in self.opts['plists']: pws = self._read_file(pwlist) with cf.ThreadPoolExecutor(threads) as exe: for pw in pws: futures.append( exe.submit(crack, s, url, headers, auth_type, us, pw)) for r in cf.as_completed(futures): if r.result(): self._log(logfile, f'{r.result()}') if exit_on_success: return futures = [] # username list + password list if self.opts['ulists'] and self.opts['plists']: for uslist in self.opts['ulists']: for pwlist in self.opts['plists']: usrs = self._read_file(uslist) pws = self._read_file(pwlist) with cf.ThreadPoolExecutor(threads) as exe: for us in usrs: for pw in pws: futures.append( exe.submit(crack, s, url, headers, auth_type, us, pw)) for r in cf.as_completed(futures): if r.result(): self._log(logfile, f'{r.result()}') if exit_on_success: return return
k = 0 for i in range(int(math.floor(a/2))): k += 2 ll.append(k) if a%2 != 0: ll.append(k+1) for k in range(len(chh)): print ('Block%d : start!' % k) file_dir_Block = r'%s/Block%d' % (file_dir,k) if not os.path.exists(file_dir_Block): os.makedirs(file_dir_Block) data1 = data[data.chrom.isin(chh[k])] value = chh[k] with ProcessPoolExecutor(max_workers=len(chh[k])) as pool: futures = [pool.submit(test,data1,value,neighbor_region,file_dir_Block,colname,i) for i in ll] for j in as_completed(futures): print(j.result()) elapsed = (time.clock() - start) print("Time used: %d s" % round(elapsed,4)) del data del data1 #################################### # print("union neighbor methFeature: Start!" ) meragefiledir = r'%s' % file_dir filenames=os.listdir(meragefiledir)
def train(walker, lr_file, ckpt_dir, checkpoint, options): vocab_size = walker.walk_nodes_size num_steps_per_epoch = int( vocab_size * options.train_workers / options.batch_size) # a rough formula of epoch in RWR.??????????? iter_epochs = options.iter_epoches iter_steps = round( iter_epochs * num_steps_per_epoch) # iter_epoches should be big enough to converge. decay_epochs = options.decay_epochs decay_steps = round(decay_epochs * num_steps_per_epoch) ckpt_steps = round(options.ckpt_epochs * num_steps_per_epoch) initial_learning_rate = options.learning_rate decay_rate = options.decay_rate LR = utils.LearningRateGenerator( initial_learning_rate=initial_learning_rate, initial_steps=0, decay_rate=decay_rate, decay_steps=decay_steps, iter_steps=iter_steps) with tf.Graph().as_default(), tf.device( '/gpu:0' if options.using_gpu else '/cpu:0'): global_step = tf.Variable(0, trainable=False, name="global_step") batch_single_size = options.walk_times * options.walk_length # ! # inputs(center_nodes), labels(context_nodes), neg_labels(neg_nodes) inputs = tf.placeholder(tf.int32, shape=[options.batch_size], name='inputs') # center_nodes labels = tf.placeholder(tf.int32, shape=[options.batch_size, batch_single_size], name='labels') # context_nodes neg_labels = tf.placeholder( tf.int32, shape=[options.batch_size, options.negative], name='neg_labels') # neg_nodes learning_rate = tf.placeholder(tf.float32, name='learning_rate') model = SGNS(vocab_size=vocab_size, embedding_size=options.embedding_size, batch_size=options.batch_size, batch_single_size=batch_single_size, local_weight=options.local_weight, global_weight=options.global_weight) train_op, loss = model.train(inputs, labels, neg_labels, global_step, learning_rate) # Create a saver. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=6) summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init_op = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU implementations. config = tf.ConfigProto( allow_soft_placement=options.allow_soft_placement, log_device_placement=options.log_device_placement) config.gpu_options.per_process_gpu_memory_fraction = options.gpu_memory_fraction config.gpu_options.allow_growth = options.allow_growth # config.gpu_options.visible_device_list = visible_device_list with tf.Session(config=config) as sess: # first_step = 0 if checkpoint == '0': # new train sess.run(init_op) elif checkpoint == '-1': # choose the latest one ckpt = tf.train.get_checkpoint_state(ckpt_dir) if ckpt and ckpt.model_checkpoint_path: # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta') # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] # first_step = int(global_step_for_restore) + 1 else: logger.warning('No checkpoint file found') return else: if os.path.exists( os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.index')): # new_saver = tf.train.import_meta_graph( # os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta')) saver.restore( sess, os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint)) # first_step = int(checkpoint) + 1 else: logger.warning( 'checkpoint {} not found'.format(checkpoint)) return summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph) ## train executor_workers = options.train_workers - 1 if executor_workers > 0: futures = set() executor = ThreadPoolExecutor(max_workers=executor_workers) for _ in range(executor_workers): future = executor.submit( _train_thread_body, RWRGenerator(walker, options.batch_size, options.walk_times, options.walk_workers), inputs, labels, neg_labels, sess, train_op, global_step, learning_rate, LR) logger.info("open a new training thread: %s" % future) futures.add(future) last_loss_time = time.time() - options.loss_interval last_summary_time = time.time() - options.summary_interval last_decay_time = last_checkpoint_time = time.time() last_decay_step = last_summary_step = last_checkpoint_step = 0 rwrgenerator = RWRGenerator(walker, options.batch_size, options.walk_times, options.walk_workers) while True: start_time = time.time() batch_inputs, batch_labels, batch_neg_labels = rwrgenerator.next_batch( ) feed_dict = { inputs: batch_inputs, labels: batch_labels, neg_labels: batch_neg_labels, learning_rate: LR.learning_rate } _, loss_value, cur_step = sess.run( [train_op, loss, global_step], feed_dict=feed_dict) now = time.time() assert not np.isnan( loss_value), 'Model diverged with loss = NaN' epoch, epoch_step = divmod(cur_step, num_steps_per_epoch) if now - last_loss_time >= options.loss_interval: format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs' logger.info(format_str % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), cur_step, epoch_step, epoch, LR.learning_rate, loss_value, now - start_time)) last_loss_time = time.time() if now - last_summary_time >= options.summary_interval or cur_step - last_summary_step >= options.summary_steps or cur_step >= iter_steps: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, cur_step) last_summary_time = time.time() last_summary_step = cur_step ckpted = False # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta') if now - last_checkpoint_time >= options.ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps: vecs, global_step_value = sess.run( [model.vectors, global_step], feed_dict=feed_dict) # vecs,weights,biases = sess.run([model.vectors,model.context_weights,model.context_biases], # feed_dict=feed_dict) checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt') utils.save_word2vec_format_and_ckpt( options.vectors_path, vecs, checkpoint_path, sess, saver, global_step_value) # save_word2vec_format(vectors_path+".contexts", weights, walker.idx_nodes) # save_word2vec_format(vectors_path+".context_biases", np.reshape(biases,[-1,1]), walker.idx_nodes) last_checkpoint_time = time.time() last_checkpoint_step = global_step_value ckpted = True # update learning rate if ckpted or now - last_decay_time >= options.decay_interval or ( decay_steps > 0 and cur_step - last_decay_step >= decay_steps): lr_info = np.loadtxt(lr_file, dtype=float) if np.abs(lr_info[1] - decay_epochs) > 1e-6: decay_epochs = lr_info[1] decay_steps = round(decay_epochs * num_steps_per_epoch) if np.abs(lr_info[2] - decay_rate) > 1e-6: decay_rate = lr_info[2] if np.abs(lr_info[3] - iter_epochs) > 1e-6: iter_epochs = lr_info[3] iter_steps = round(iter_epochs * num_steps_per_epoch) if np.abs(lr_info[0] - initial_learning_rate) > 1e-6: initial_learning_rate = lr_info[0] LR.reset(initial_learning_rate=initial_learning_rate, initial_steps=cur_step, decay_rate=decay_rate, decay_steps=decay_steps, iter_steps=iter_steps) else: LR.exponential_decay(cur_step, decay_rate=decay_rate, decay_steps=decay_steps, iter_steps=iter_steps) last_decay_time = time.time() last_decay_step = cur_step if cur_step >= LR.iter_steps: break summary_writer.close() if executor_workers > 0: logger.info("waiting the training threads finished:") try: for future in as_completed(futures): logger.info(future) except KeyboardInterrupt: print("stopped by hand.")
print(f'========= stderr of {binary}:') print(result[2]) if __name__ == "__main__": clean_binary_tests() build_tests() binaries = test_binaries(exclude=[r'test_regression-.*', r'near_rpc_error_macro-.*']) print(f'========= collected {len(binaries)} test binaries:') print('\n'.join(binaries)) completed = 0 fails = [] with ThreadPoolExecutor(max_workers=workers()) as executor: future_to_binary = {executor.submit(run_test, binary): binary for binary in binaries} for future in as_completed(future_to_binary): completed += 1 binary_full_name = future_to_binary[future] binary = os.path.basename(binary_full_name) result = future.result() if result[0] != 0: fails.append((binary_full_name, result)) else: show_test_result(binary, result) print(f"========= finished run {completed} test binaries") if fails: if len(fails) <= RERUN_THRESHOLD: # if not fail a lot, retry run test sequentially to avoid potential timeout new_fails = [] for f in fails:
def fetch_graph_and_labels(parameters, graph_config): decision_task_id = find_decision_task(parameters, graph_config) # First grab the graph and labels generated during the initial decision task full_task_graph = get_artifact(decision_task_id, "public/full-task-graph.json") logger.info("Load taskgraph from JSON.") _, full_task_graph = TaskGraph.from_json(full_task_graph) label_to_taskid = get_artifact(decision_task_id, "public/label-to-taskid.json") logger.info("Fetching additional tasks from action and cron tasks.") # fetch everything in parallel; this avoids serializing any delay in downloading # each artifact (such as waiting for the artifact to be mirrored locally) with futures.ThreadPoolExecutor(CONCURRENCY) as e: fetches = [] # fetch any modifications made by action tasks and swap out new tasks # for old ones def fetch_action(task_id): logger.info( "fetching label-to-taskid.json for action task {}".format( task_id)) try: run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json") label_to_taskid.update(run_label_to_id) except HTTPError as e: if e.response.status_code != 404: raise logger.debug("No label-to-taskid.json found for {}: {}".format( task_id, e)) head_rev_param = "{}head_rev".format( graph_config["project-repo-param-prefix"]) namespace = "{}.v2.{}.revision.{}.taskgraph.actions".format( graph_config["trust-domain"], parameters["project"], parameters[head_rev_param], ) for task_id in list_tasks(namespace): fetches.append(e.submit(fetch_action, task_id)) # Similarly for cron tasks.. def fetch_cron(task_id): logger.info( "fetching label-to-taskid.json for cron task {}".format( task_id)) try: run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json") label_to_taskid.update(run_label_to_id) except HTTPError as e: if e.response.status_code != 404: raise logger.debug("No label-to-taskid.json found for {}: {}".format( task_id, e)) namespace = "{}.v2.{}.revision.{}.cron".format( graph_config["trust-domain"], parameters["project"], parameters[head_rev_param], ) for task_id in list_tasks(namespace): fetches.append(e.submit(fetch_cron, task_id)) # now wait for each fetch to complete, raising an exception if there # were any issues for f in futures.as_completed(fetches): f.result() return (decision_task_id, full_task_graph, label_to_taskid)
schedule_type='exponential') sys.stderr.write("lr={}\n".format(lr)) with ThreadPoolExecutor(max_workers=num_jobs) as executor: job_pool = [] sys.stderr.write("Num jobs = {}\n".format(num_jobs)) sys.stderr.flush() for job_id in range(1, num_jobs + 1): frame_shift = num_archives_processed % args.frame_subsampling_factor p = executor.submit(run_job, num_jobs, job_id, dirname, iter_no, model_file, lr, frame_shift, egs_dir, num_archives, num_archives_processed, "16,8", cuda_cmd) num_archives_processed += 1 job_pool.append(p) for p in as_completed(job_pool): if p.result() != 0: quit(p.result()) model_list = [ os.path.join(dirname, "{}.{}.pt".format(iter_no, job_id)) for job_id in range(1, num_jobs + 1) ] process_out = subprocess.run([ *cuda_cmd.split(), "{}/log/merge.{}.log".format(dirname, iter_no + 1), model_file, "--dir", dirname, "--mode", "merge", "--new-model", os.path.join(dirname, "{}.pt".format(iter_no + 1)), ",".join(model_list) ]) if process_out.returncode != 0: quit(process_out.returncode)
urllib.request.urlopen(request, timeout=args.timeout, context=context) start_time = time.time() success_login = False if len(password) > 1: log.debug("total data in wordlist: " + str(len(password)) + " words") log.info("starting a login brute force") with ThreadPoolExecutor(max_workers=args.thread) as executor: processed = (executor.submit(login, args.url, args.usr, pwd, args.timeout, args.proxy) for pwd in password) for i, process in enumerate(as_completed(processed)): if len(password) > 1: print("[{}][INFO] testing {} password".format( datetime.now().strftime("%H:%M:%S"), i), end="\r") process = process.result() if process is not False: success_login = True password = process break if success_login is True: log.success( "successfully entered into the target dashboard with username \"" + args.usr + "\" and password \"" + password + "\"")
def run_python_tests(self, tests=None, test_objects=None, subsuite=None, verbose=False, jobs=None, exitfirst=False, extra=None, **kwargs): self.activate_virtualenv() if test_objects is None: from moztest.resolve import TestResolver resolver = self._spawn(TestResolver) # If we were given test paths, try to find tests matching them. test_objects = resolver.resolve_tests(paths=tests, flavor='python') else: # We've received test_objects from |mach test|. We need to ignore # the subsuite because python-tests don't use this key like other # harnesses do and |mach test| doesn't realize this. subsuite = None mp = TestManifest() mp.tests.extend(test_objects) filters = [] if subsuite == 'default': filters.append(mpf.subsuite(None)) elif subsuite: filters.append(mpf.subsuite(subsuite)) tests = mp.active_tests(filters=filters, disabled=False, python=self.virtualenv_manager.version_info[0], **mozinfo.info) if not tests: submsg = "for subsuite '{}' ".format(subsuite) if subsuite else "" message = "TEST-UNEXPECTED-FAIL | No tests collected " + \ "{}(Not in PYTHON_UNITTEST_MANIFESTS?)".format(submsg) self.log(logging.WARN, 'python-test', {}, message) return 1 parallel = [] sequential = [] os.environ.setdefault('PYTEST_ADDOPTS', '') if extra: os.environ['PYTEST_ADDOPTS'] += " " + " ".join(extra) if exitfirst: sequential = tests os.environ['PYTEST_ADDOPTS'] += " -x" else: for test in tests: if test.get('sequential'): sequential.append(test) else: parallel.append(test) self.jobs = jobs or cpu_count() self.terminate = False self.verbose = verbose return_code = 0 def on_test_finished(result): output, ret, test_path = result for line in output: self.log(logging.INFO, 'python-test', {'line': line.rstrip()}, '{line}') if ret and not return_code: self.log(logging.ERROR, 'python-test', { 'test_path': test_path, 'ret': ret }, 'Setting retcode to {ret} from {test_path}') return return_code or ret with ThreadPoolExecutor(max_workers=self.jobs) as executor: futures = [ executor.submit(self._run_python_test, test) for test in parallel ] try: for future in as_completed(futures): return_code = on_test_finished(future.result()) except KeyboardInterrupt: # Hack to force stop currently running threads. # https://gist.github.com/clchiou/f2608cbe54403edb0b13 executor._threads.clear() thread._threads_queues.clear() raise for test in sequential: return_code = on_test_finished(self._run_python_test(test)) if return_code and exitfirst: break self.log(logging.INFO, 'python-test', {'return_code': return_code}, 'Return code from mach python-test: {return_code}') return return_code