def run_trials(): numTrials = 3000 gens = 1000 from multiprocessing.pool import ThreadPool as Pool pool = Pool(50) jids = pool.map(f,[gens]*numTrials) print "Done spawning trials. Retrieving results..." results = pool.map(cloud_result, jids) firstLocusFreqsHists = zeros((numTrials,gens+1), dtype='float') lastLocusFreqsHists = zeros((numTrials,gens+1), dtype='float') print "Done retrieving results. Press Enter to serialize..." raw_input() for i, result in enumerate(results): firstLocusFreqsHists[i, :], lastLocusFreqsHists[i, :] = result with closing(FileStorage("soda_results.durus")) as durus: conn = Connection(durus) conn.get_root()[str(int(floor(time.time())))] = (firstLocusFreqsHists, lastLocusFreqsHists) conn.commit() pool.close() pool.join()
def demo(args): """ Demonstrates the Python logging facility. """ cli = argparse.ArgumentParser() cli.add_argument("--verbose", "-v", action='count', default=ENV_VERBOSITY) cli.add_argument("--quiet", "-q", action='count', default=0) args = cli.parse_args(args) level = verbosity_to_level(args.verbose - args.quiet) info("new log level: " + str(level)) old_level = set_loglevel(level) info("old level was: " + str(old_level)) info("printing some messages with different log levels") spam("rofl") dbg("wtf?") info("foo") warn("WARNING!!!!") err("that didn't go so well") crit("pretty critical, huh?") info("restoring old loglevel") set_loglevel(old_level) info("old loglevel restored") info("running some threaded stuff") pool = ThreadPool() for i in range(8): pool.apply_async(info, ("async message #" + str(i),)) pool.close() pool.join()
def update(args=None): projects = list_projects(False, args.dir) print("Update in progress...") if args.j: pool = Pool(args.j) def worker(p): if p.is_behind(): p.update() print("{} updated".format(p.name)) for p in projects: pool.apply_async(worker, (p,)) pool.close() pool.join() else: for p in projects: if p.is_behind(): p.update() print("{} updated".format(p.name)) print("Update done")
def dowload_person(person_url): print 'start to downlaod person %s\n'%(person_url) person_pic_url = get_person_pic_url_Set(person_url) pool = ThreadPool(8) pool.map(download_pic,person_pic_url) pool.close() pool.join()
def local_job_runner(cmds_list, num_threads, throw_error=True): """ Execute a list of cmds locally using thread pool with at most num_threads threads, wait for all jobs to finish before exit. If throw_error is True, when any job failed, raise RuntimeError. If throw_error is False, return a list of cmds that failed. Parameters: cmds_list - cmds that will be executed in ThreadPool num_threads - number of threads that will be used in the ThreadPool throw_error - whether or not to throw RuntimeError when any of cmd failed. rescue - whether or not to rescue this job rescue_times - maximum number of rescue times """ run_cmd_in_shell = lambda x: backticks(x, merge_stderr=True) try: pool = ThreadPool(processes=num_threads) rets = pool.map(run_cmd_in_shell, cmds_list) pool.close() pool.join() except subprocess.CalledProcessError: pass failed_cmds = [cmds_list[i] for i in range(0, len(cmds_list)) if rets[i][1] != 0] failed_cmds_out = [rets[i][0] for i in range(0, len(cmds_list)) if rets[i][1] != 0] if throw_error and len(failed_cmds) > 0: errmsg = "\n".join(["CMD failed: %s, %s" % (cmd, out) for (cmd, out) in zip(failed_cmds, failed_cmds_out)]) raise RuntimeError(errmsg) else: return failed_cmds
def main(dir_path, outfile_path, is_journal=True): pn = 20 flst = os.listdir(dir_path) arglst = [] ret = dict() for i in range(pn): beg = int(math.ceil(float(len(flst)) / pn * i)) end = int(math.ceil(float(len(flst)) / pn * (i + 1))) if(id == 0): beg = 0 if(id == pn - 1): end = (len(flst)) arglst.append([dir_path, is_journal, beg, end, i, ret]) pool = ThreadPool(pn) pool.map(job_map, arglst) pool.close() pool.join() print(80 * '=') print('[acmdl]: map finished') print(80 * '=') job_reduce(ret, outfile_path) print(80 * '=') print('[acmdl]: reduce finished') print(80 * '=') return
def getMessagesBySource(self, source, batch_mode=False): """ Returns the messages for the given source, including messages from the configured builder (if available) and static checks Extra arguments are """ self._setupEnvIfNeeded() if self._USE_THREADS: records = [] pool = ThreadPool() static_check = pool.apply_async( getStaticMessages, args=(source.getSourceContent().split('\n'), )) if self._isBuilderCallable(): builder_check = pool.apply_async(self._getBuilderMessages, args=[source, batch_mode]) records += builder_check.get() records += static_check.get() pool.terminate() pool.join() else: records = getStaticMessages(source.getSourceContent().split('\n')) if self._isBuilderCallable(): records += self._getBuilderMessages(source, batch_mode) self._saveCache() return records
def handle_noargs(self, **options): mimetypes.init() locked_print("===> Syncing static directory") pool = ThreadPool(20) # Sync every file in the static media dir with S3 def pooled_sync_file(base, filename): pool.apply_async(self.sync_file, args=[base, filename]) self.walk_tree([conf.SIMPLESTATIC_DIR], pooled_sync_file) pool.close() pool.join() locked_print("===> Static directory syncing complete") locked_print("===> Compressing and uploading CSS and JS") pool = ThreadPool(20) # Iterate over every template, looking for SimpleStaticNode def pooled_handle_template(base, filename): pool.apply_async(self.handle_template, args=[base, filename]) self.walk_tree(list(settings.TEMPLATE_DIRS), pooled_handle_template) pool.close() pool.join() locked_print("===> Finished compressing and uploading CSS and JS")
def get_for_genres(genres): genres = set(genres) playlists = {} new_genres = set() for page in xrange(5): args = [] for g in genres: args.append((g, page)) try: pool = ThreadPool(PROCESSES) pfunc = parse_page for i, res in enumerate(pool.imap_unordered(pfunc, args)): genre, page, pl, found = res print "%d/%d" % (i + 1, len(args)) playlists.update(pl) new_genres |= found if not pl: genres.remove(genre) except Exception as e: print e return playlists, [] finally: pool.terminate() pool.join() return playlists, new_genres
def _power_off_and_delete_all_vm_resources(self, api, reservation_details): resources = reservation_details.ReservationDescription.Resources pool = ThreadPool() async_results = [] lock = Lock() message_status = { "power_off": False, "delete": False } for resource in resources: resource_details = api.GetResourceDetails(resource.Name) if resource_details.VmDetails: result_obj = pool.apply_async(self._power_off_or_delete_deployed_app, (api, resource_details, lock, message_status)) async_results.append(result_obj) pool.close() pool.join() resource_to_delete = [] for async_result in async_results: result = async_result.get() if result is not None: resource_to_delete.append(result) # delete resource - bulk if resource_to_delete: api.DeleteResources(resource_to_delete)
def read(self, sftppath, localPath = None, numParallelConnections = 1): if localPath is None: localPath = os.getcwd() # local path - can be changed later sftp = paramiko.SFTPClient.from_transport(self.transport) if (numParallelConnections > 1): pool = ThreadPool(numParallelConnections) def getFile(sftppath, localpath): pconnection = SFTPConnection(self.connectionInfo) pconnection.connect() psftp = paramiko.SFTPClient.from_transport(pconnection.transport) psftp.get(sftppath, localpath) psftp.close() pconnection.close() def recursiveRead(sftp, sftppath, localPath): fileattr = sftp.lstat(sftppath) if not stat.S_ISDIR(fileattr.st_mode): #it is a file if (numParallelConnections > 1): pool.apply_async(getFile, args= (sftppath, os.path.join(localPath, os.path.basename(sftppath)))) else: sftp.get(sftppath, os.path.join(localPath, os.path.basename(sftppath))) else: #it is a directory try: #creating local directory, using try-catch to handle race conditions os.makedirs(os.path.join(localPath, os.path.basename(sftppath))) except OSError as exception: if exception.errno != errno.EEXIST: raise for file in sftp.listdir_attr(sftppath): recursiveRead(sftp, os.path.join(sftppath, file.filename), os.path.join(localPath, os.path.basename(sftppath))) recursiveRead(sftp, sftppath, localPath) sftp.close() if (numParallelConnections > 1): pool.close() pool.join()
def _run_tests(self): "Runs the tests, produces no report." run_alone = [] tests = self._tests pool = ThreadPool(self._worker_count) try: for cmd, options in tests: options = options or {} if matches(self._configured_run_alone_tests, cmd): run_alone.append((cmd, options)) else: self._spawn(pool, cmd, options) pool.close() pool.join() if run_alone: util.log("Running tests marked standalone") for cmd, options in run_alone: self._run_one(cmd, **options) except KeyboardInterrupt: try: util.log('Waiting for currently running to finish...') self._reap_all() except KeyboardInterrupt: pool.terminate() raise except: pool.terminate() raise
def bench_compression_comparison(n_chunks, df_length, append_mul, pool_size, pool_step, repeats, use_raw_lz4, use_HC): _str = construct_test_data(df_length, append_mul) chunk_size = len(_str) / 1024 ** 2.0 _strarr = [_str] * n_chunks # Single threaded # --------------- measurements = bench_single(repeats, _strarr, use_HC) print_results(1, chunk_size, n_chunks, chunk_size*n_chunks, measurements) single_mean = np.mean(measurements) # Multi-threaded # -------------- for sz in range(2, pool_size + 1, pool_step): if use_raw_lz4: pool = ThreadPool(sz) else: pool = None c.set_compression_pool_size(sz) measurements = bench_multi(repeats, _strarr, use_HC, pool=pool) print_results(sz, chunk_size, n_chunks, chunk_size * n_chunks, measurements, compare=single_mean) if pool: pool.close() pool.join() print("")
def thread(host, port, threads, num): pool = ThreadPool(threads) for _ in range(num): pool.apply_async(job, (host, port)) time.sleep(0.001) pool.close() pool.join()
def check_artifact_cache(self, vts): """Checks the artifact cache for the specified VersionedTargetSets. Returns a list of the ones that were satisfied from the cache. These don't require building. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) if self._artifact_cache and self.context.options.read_from_artifact_cache: pool = ThreadPool(processes=6) res = pool.map(lambda vt: self._artifact_cache.use_cached_files(vt.cache_key), vts, chunksize=1) pool.close() pool.join() for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) self.context.log.info('Using cached artifacts for %s' % vt.targets) vt.update() else: self.context.log.info('No cached artifacts for %s' % vt.targets) return cached_vts, list(uncached_vts)
def run(self, suites): wrapper = self.config.plugins.prepareTest(suites) if wrapper is not None: suites = wrapper wrapped = self.config.plugins.setOutputStream(self.stream) if wrapped is not None: self.stream = wrapped result = self._makeResult() size = self.config.options.thread_pool if size < 0: size = cpu_count() pool = ThreadPool(size) with measure_time(result): for suite in suites: pool.apply_async(suite, args=(result,)) pool.close() pool.join() self.config.plugins.finalize(result) return result
def downloadPDFs(self): ### Download all the files extracted from the metadata startTime = time.strftime("%c") # Loop through the CSV f = open(self.csvpath) metadata = csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True) for row in metadata: pmcid = row[8] ### Check the input is a PMC ID if 'PMC' in pmcid: print('Starting thread for: '+pmcid) pool = Pool(30) pool.apply_async(self.saveFile, (pmcid,)) pool.close() pool.join() else: print('Something is wrong. '+pmcid+' is not a PMC id') sys.exit(0) f.close() print('Finished downloading all files: start {} end {}.'.format(startTime, time.strftime("%c")))
def ons_resolver(key): def check_server(server): try: namecoind = NamecoindServer(server, NAMECOIND_PORT, NAMECOIND_USER, NAMECOIND_PASSWD) return_data = namecoind.get_full_profile('u/' + key) return return_data except: return error_reply("Couldn't connect to namecoind") pool = ThreadPool(len(ONS_SERVERS)) replies = pool.map(check_server, ONS_SERVERS) pool.close() pool.join() data_hashes = [] for reply in replies: data_hashes.append(hashlib.md5(json.dumps(reply)).hexdigest()) count = Counter(data_hashes) max_repeated_times = count.most_common()[0][1] if max_repeated_times >= (SERVER_CONFIRMATION_PERCENTAGE/100.0) * len(ONS_SERVERS): return replies[0] else: return error_reply("Data from different ONS servers doens't match")
def run_tidy(sha="HEAD", is_rev_range=False): diff_cmdline = ["git", "diff" if is_rev_range else "show", sha] # Figure out which paths changed in the given diff. changed_paths = subprocess.check_output(diff_cmdline + ["--name-only", "--pretty=format:"]).splitlines() changed_paths = [p for p in changed_paths if p] # Produce a separate diff for each file and run clang-tidy-diff on it # in parallel. def tidy_on_path(path): patch_file = tempfile.NamedTemporaryFile() cmd = diff_cmdline + [ "--src-prefix=%s/" % ROOT, "--dst-prefix=%s/" % ROOT, "--", path] subprocess.check_call(cmd, stdout=patch_file, cwd=ROOT) cmdline = [CLANG_TIDY_DIFF, "-clang-tidy-binary", CLANG_TIDY, "-p0", "--", "-DCLANG_TIDY"] + compile_flags.get_flags() return subprocess.check_output( cmdline, stdin=file(patch_file.name), cwd=ROOT) pool = ThreadPool(multiprocessing.cpu_count()) try: return "".join(pool.imap(tidy_on_path, changed_paths)) except KeyboardInterrupt as ki: sys.exit(1) finally: pool.terminate() pool.join()
def main(): pool = ThreadPool(10) base_url = 'https://www.google.com/?gws_rd=ssl#q=' urls = [base_url+str(i) for i in xrange(1000)] pool.map(google_search, urls) pool.close() pool.join()
def poll_all(self, recipient_infos): # Recipient_info entries are of form: (player, type, body) results = dict() threads = dict() # For each recipient, make an asynchronous process to handle their response num_reqs = len(recipient_infos) pool = ThreadPool(processes=num_reqs) for info in recipient_infos: # Unpack poll() args receiver = info[0] rq_type = info[1] body = info[2] # Run each poll on a separate thread threads[receiver] = pool.apply_async(self.poll, (receiver, rq_type, body,)) # Get the results, store them in a dict # Seems like it defeats the purpose of polling asynchronously, but it doesn't (brain teaser?) for info in recipient_infos: receiver = info[0] try: results[receiver] = threads[receiver].get(timeout=self.timeout) except Exception as e: self.log_error(e) results[receiver] = None # Worry about this later # Clean up those threads pool.close() pool.join() # Return the dict return results
class parallel_map(collections.Iterable): def __init__(self, pool_size, function, *iterables): if not isinstance(pool_size, numbers.Integral): raise TypeError('pool_size must be an integer, not ' + repr(pool_size)) elif not callable(function): raise TypeError('function must be callable, not ' + repr(function)) elif not iterables: raise TypeError('missing iterable') self.pool = ThreadPool(pool_size) self.function = function self.results = self.pool.imap_unordered(self.map_function, zip(*iterables)) def map_function(self, args): try: value = self.function(*args) except Exception: return False, sys.exc_info() return True, value def __iter__(self): errors = [] for success, value in self.results: if success: yield value else: errors.append(value) self.pool.close() self.pool.join() for error in errors: exec('raise error[1], None, error[2]')
def _send_some_brokers(self, requests, ignore_errors=True): """ Sends a request to one or more brokers. The responses are returned mapped to the broker that they were retrieved from. This method uses a thread pool to parallelize sends. Args: request (int -> BaseRequest): A dictionary, where keys are integer broker IDs and the values are valid request objects that inherit from BaseRequest. Returns: dict (int -> BaseResponse): A map of broker IDs to response instances (inherited from BaseResponse). Failed requests are represented with a value of None """ results = {} pool = ThreadPool(processes=self.configuration.broker_threads) for broker_id in requests: results[broker_id] = pool.apply_async(self._send_to_broker, (broker_id, requests[broker_id])) pool.close() pool.join() responses = {} for broker_id in results: try: responses[broker_id] = results[broker_id].get() except ConnectionError: if ignore_errors: # Individual broker failures are OK, as we'll represent them with a None value responses[broker_id] = None else: raise return responses
def main(): # Run the Tales pool = ThreadPool(processes=int(tcfg['Workers'].get('pool_size', 10))) pool = ThreadPool() pool.map(worker, tales) pool.close() pool.join()
def run(self): pool = ThreadPool(self.num_agents) for idx in range(self.num_agents): pool.apply_async(self.run_experiement, args=(self.experiment, idx)) pool.close() pool.join()
def main(): good_proxys = [] socket.setdefaulttimeout(10) with open('proxylist.txt') as f: proxy_list = f.readlines() total = len(proxy_list) pool = ThreadPool(multiprocessing.cpu_count() * 2 + 1) async_results = [] for index, proxy in enumerate(proxy_list): if proxy.startswith('http://'): curr_proxy = proxy[7:].strip() else: curr_proxy = proxy.strip() async_results.append(pool.apply_async( check_proxy, args=(curr_proxy, index, total) )) pool.close() pool.join() for result in async_results: proxy = result.get() if proxy: good_proxys.append(proxy) if not good_proxys: print 'No proxy are working!' return with open('proxy.txt', 'w') as f: for proxy in good_proxys: f.write(proxy + '\n')
def worker(self, db, lista): ''' Metodo per eseguire il processo di ricerca dei plugin in multithread Multithread method for online search ''' # Make the Pool of workers processes = 5 #WARNING: con la fibra posso arrivare a 20 senza errori, con adsl massimo 4 worker! pool = Pool(processes) # Open the urls in their own threads and return the results pluglist = pool.map(onlinePluginSearch, lista) #close the pool and wait for the work to finish pool.close() pool.join() #parsa il risultato (lista con tuple) e metti tutto in una stringa (result) e aggiorna cache result = '' for item in pluglist: if item[1] !=[]: for plug in item[1]: db.updateCache(item[0], plug) result = result + str(plug) + ',' numbers = result.count(',') + 1 print("Number of available pflugins: %s" % numbers) print("Adding to policy plugins: 19506,10287,12634 for credential checks and ping target.") result = result + "19506,10287,12634" #aggiungo sempre questi 3 plug-in per verificare se il target e' alive return result
def _listArtifacts(self, urls, gavs): """ Loads maven artifacts from list of GAVs and tries to locate the artifacts in one of the specified repositories. :param urls: repository URLs where the given GAVs can be located :param gavs: List of GAVs :returns: Dictionary where index is MavenArtifact object and value is it's repo root URL. """ def findArtifact(gav, urls, artifacts): artifact = MavenArtifact.createFromGAV(gav) for url in urls: if maven_repo_util.gavExists(url, artifact): #Critical section? artifacts[artifact] = ArtifactSpec(url) return logging.warning('Artifact %s not found in any url!', artifact) artifacts = {} pool = ThreadPool(maven_repo_util.MAX_THREADS) for gav in gavs: pool.apply_async(findArtifact, [gav, urls, artifacts]) # Close the pool and wait for the workers to finnish pool.close() pool.join() return artifacts
def run(self, max_number_of_live_tokens=None, group=None): group = Pool() try: stages = [] in_q = _DummyQueue() end_in = Event() if self._filters[0].is_serial: serial = Lock() else: serial = _DummyLock() if self._filters[0].is_ordered: out_q = PriorityQueue() else: out_q = Queue() for i, f in enumerate(self._filters): pass send_q, recv_q = Queue(), Queue() group.close() except: group.terminate() finally: group.join()
def cleanup(self, odps): cleaned = [] def cleaner_thread(obj): try: obj.drop(odps) cleaned.append(obj) except: pass pool = ThreadPool(CLEANER_THREADS) if self._container: pool.map(cleaner_thread, self._container) pool.close() pool.join() for obj in cleaned: if obj in self._container: self._container.remove(obj) if not self._container: try: os.unlink(self._file_name) except OSError: pass else: self.dump()
def start_pool(self): pool = Pool(self.p) pool.map(self.func, self.arr) pool.close() pool.join()
code = emote['regex'] number = emote['images'][0]['emoticon_set'] try: parentPath = './emotes/' + str(number) if not os.path.exists(parentPath): os.makedirs(parentPath) filePath = './emotes/' + str(number) + '/' + str(code) + '.png' if not os.path.exists(filePath): if (printme): print('Downloading: ' + str(code) + ' in ... ' + filePath) urllib.request.urlretrieve(emote['images'][0]['url'], filePath) count += 1 else: if (printme): print('skipped') except Exception as e: print(e) for emote in emotes['emoticons']: pool.apply_async(my_op, (emote, )) pool.close() pool.join() end = time.time() print('Downloaded ' + str(count) + ' new files') print('Running time: ' + str(end - start))
def prepare_connectivity(self, reservation, cloud_provider_model, storage_client, resource_client, network_client, logger, actions, cancellation_context): """ :param logging.Logger logger: :param actions: list[cloudshell.cp.core.models.RequestActionBase] :param network_client: :param storage_client: :param resource_client: :param cloudshell.cp.azure.models.reservation_model.ReservationModel reservation: :param cloudshell.cp.azure.models.azure_cloud_provider_resource_model.AzureCloudProviderResourceModel cloud_provider_model:cloud provider :param cancellation_context cloudshell.shell.core.driver_context.CancellationContext instance :return: """ cidr = self._validate_request_and_extract_cidr(actions) logger.info("Received CIDR {0} from server".format(cidr)) reservation_id = reservation.reservation_id group_name = str(reservation_id) subnet_name = group_name tags = self.tags_service.get_tags(reservation=reservation) network_action_result = PrepareCloudInfraResult() # 1. Create a resource group logger.info("Creating a resource group: {0} .".format(group_name)) self.vm_service.create_resource_group( resource_management_client=resource_client, group_name=group_name, region=cloud_provider_model.region, tags=tags) self.cancellation_service.check_if_cancelled(cancellation_context) storage_account_name = self._prepare_storage_account_name( reservation_id) # 2+3. create storage account and keypairs (async) pool = ThreadPool() storage_res = pool.apply_async( self._create_storage_and_keypairs, (logger, storage_client, storage_account_name, group_name, cloud_provider_model, tags, cancellation_context, network_action_result)) logger.info( "Retrieving MGMT vNet from resource group {} by tag {}={}".format( cloud_provider_model.management_group_name, NetworkService.NETWORK_TYPE_TAG_NAME, NetworkService.MGMT_NETWORK_TAG_VALUE)) virtual_networks = self.network_service.get_virtual_networks( network_client=network_client, group_name=cloud_provider_model.management_group_name) self.cancellation_service.check_if_cancelled(cancellation_context) management_vnet = self.network_service.get_virtual_network_by_tag( virtual_networks=virtual_networks, tag_key=NetworkService.NETWORK_TYPE_TAG_NAME, tag_value=NetworkService.MGMT_NETWORK_TAG_VALUE) self._validate_management_vnet(management_vnet) logger.info( "Retrieving sandbox vNet from resource group {} by tag {}={}". format(cloud_provider_model.management_group_name, NetworkService.NETWORK_TYPE_TAG_NAME, NetworkService.SANDBOX_NETWORK_TAG_VALUE)) sandbox_vnet = self.network_service.get_virtual_network_by_tag( virtual_networks=virtual_networks, tag_key=NetworkService.NETWORK_TYPE_TAG_NAME, tag_value=NetworkService.SANDBOX_NETWORK_TAG_VALUE) self._validate_sandbox_vnet(sandbox_vnet) # 4. Create the NSG object security_group_name = reservation_id logger.info("Creating a network security group '{}' .".format( security_group_name)) network_security_group = self.security_group_service.create_network_security_group( network_client=network_client, group_name=group_name, security_group_name=security_group_name, region=cloud_provider_model.region, tags=tags) self.cancellation_service.check_if_cancelled(cancellation_context) logger.info("Creating NSG management rules...") # 5. Set rules on NSG to create a sandbox self._create_management_rules( group_name=group_name, management_vnet=management_vnet, network_client=network_client, sandbox_vnet_cidr=cidr, security_group_name=security_group_name, additional_mgmt_networks=cloud_provider_model. additional_mgmt_networks, logger=logger) self.cancellation_service.check_if_cancelled(cancellation_context) # 6. Create a subnet with NSG self._create_subnet(cidr=cidr, cloud_provider_model=cloud_provider_model, logger=logger, network_client=network_client, resource_client=resource_client, network_security_group=network_security_group, sandbox_vnet=sandbox_vnet, subnet_name=subnet_name) self.cancellation_service.check_if_cancelled(cancellation_context) # wait for all async operations pool.close() pool.join() storage_res.get( timeout=900 ) # will wait for 15 min and raise exception if storage account creation failed return self._prepare_results(network_action_result, actions)
class ApiClient(object): """ Generic API client for Swagger client library builds. Swagger generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of the methods and models for each application are generated from the Swagger templates. NOTE: This class is auto generated by the swagger code generator program. Ref: https://github.com/swagger-api/swagger-codegen Do not edit the class manually. :param host: The base path for the server to call. :param header_name: a header to pass when making calls to the API. :param header_value: a header value to pass when making calls to the API. """ PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types NATIVE_TYPES_MAPPING = { 'int': int, 'long': int if PY3 else long, 'float': float, 'str': str, 'bool': bool, 'date': date, 'datetime': datetime, 'object': object, } def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration self.pool = ThreadPool() self.rest_client = RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/1.0.0/python' def __del__(self): self.pool.close() self.pool.join() @property def user_agent(self): """ Gets user agent. """ return self.default_headers['User-Agent'] @user_agent.setter def user_agent(self, value): """ Sets user agent. """ self.default_headers['User-Agent'] = value def set_default_header(self, header_name, header_value): self.default_headers[header_name] = header_value def __call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): config = self.configuration # header parameters header_params = header_params or {} header_params.update(self.default_headers) if self.cookie: header_params['Cookie'] = self.cookie if header_params: header_params = self.sanitize_for_serialization(header_params) header_params = dict( self.parameters_to_tuples(header_params, collection_formats)) # path parameters if path_params: path_params = self.sanitize_for_serialization(path_params) path_params = self.parameters_to_tuples(path_params, collection_formats) for k, v in path_params: # specified safe chars, encode everything resource_path = resource_path.replace( '{%s}' % k, quote(str(v), safe=config.safe_chars_for_path_param)) # query parameters if query_params: query_params = self.sanitize_for_serialization(query_params) query_params = self.parameters_to_tuples(query_params, collection_formats) # post parameters if post_params or files: post_params = self.prepare_post_parameters(post_params, files) post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) # auth setting self.update_params_for_auth(header_params, query_params, auth_settings) # body if body: body = self.sanitize_for_serialization(body) # request url url = self.configuration.host + resource_path # perform request and return response response_data = self.request(method, url, query_params=query_params, headers=header_params, post_params=post_params, body=body, _preload_content=_preload_content, _request_timeout=_request_timeout) self.last_response = response_data return_data = response_data if _preload_content: # deserialize response data if response_type: return_data = self.deserialize(response_data, response_type) else: return_data = None if _return_http_data_only: return (return_data) else: return (return_data, response_data.status, response_data.getheaders()) def sanitize_for_serialization(self, obj): """ Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is swagger model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. """ if obj is None: return None elif isinstance(obj, self.PRIMITIVE_TYPES): return obj elif isinstance(obj, list): return [ self.sanitize_for_serialization(sub_obj) for sub_obj in obj ] elif isinstance(obj, tuple): return tuple( self.sanitize_for_serialization(sub_obj) for sub_obj in obj) elif isinstance(obj, (datetime, date)): return obj.isoformat() if isinstance(obj, dict): obj_dict = obj else: # Convert model obj to dict except # attributes `swagger_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = { obj.attribute_map[attr]: getattr(obj, attr) for attr, _ in iteritems(obj.swagger_types) if getattr(obj, attr) is not None } return { key: self.sanitize_for_serialization(val) for key, val in iteritems(obj_dict) } def deserialize(self, response, response_type): """ Deserializes response into an object. :param response: RESTResponse object to be deserialized. :param response_type: class literal for deserialized object, or string of class name. :return: deserialized object. """ # handle file downloading # save response body into a tmp file and return the instance if response_type == "file": return self.__deserialize_file(response) # fetch data from response object try: data = json.loads(response.data) except ValueError: data = response.data return self.__deserialize(data, response_type) def __deserialize(self, data, klass): """ Deserializes dict, list, str into an object. :param data: dict, list or str. :param klass: class literal, or string of class name. :return: object. """ if data is None: return None if type(klass) == str: if klass.startswith('list['): sub_kls = re.match('list\[(.*)\]', klass).group(1) return [ self.__deserialize(sub_data, sub_kls) for sub_data in data ] if klass.startswith('dict('): sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) return { k: self.__deserialize(v, sub_kls) for k, v in iteritems(data) } # convert str to class if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: klass = getattr(models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) elif klass == object: return self.__deserialize_object(data) elif klass == date: return self.__deserialize_date(data) elif klass == datetime: return self.__deserialize_datatime(data) else: return self.__deserialize_model(data, klass) def call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, async=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None):
def __get_latest_routemanagers(self) -> Optional[Dict[str, dict]]: global mode_mapping areas: Optional[Dict[str, dict]] = {} if self.__configmode: return areas raw_areas = self.__data_manager.get_root_resource('area') thread_pool = ThreadPool(processes=4) areas_procs = {} for area_id, area_true in raw_areas.items(): area = area_true.get_resource() if area["geofence_included"] is None: raise RuntimeError("Cannot work without geofence_included") try: geofence_included = self.__data_manager.get_resource( 'geofence', identifier=area["geofence_included"]) except Exception: raise RuntimeError( "geofence_included for area '{}' is specified but does not exist ('{}')." .format(area["name"], geofence_included)) geofence_excluded_raw_path = area.get("geofence_excluded", None) try: if geofence_excluded_raw_path is not None: geofence_excluded = self.__data_manager.get_resource( 'geofence', identifier=geofence_excluded_raw_path) else: geofence_excluded = None except Exception: raise RuntimeError( "geofence_excluded for area '{}' is specified but file does not exist ('{}')." .format(area["name"], geofence_excluded_raw_path)) area_dict = { "mode": area_true.area_type, "geofence_included": geofence_included, "geofence_excluded": geofence_excluded, "routecalc": area["routecalc"], "name": area['name'] } # also build a routemanager for each area... # grab coords # first check if init is false, if so, grab the coords from DB geofence_helper = GeofenceHelper(geofence_included, geofence_excluded) mode = area_true.area_type # build routemanagers # map iv list to ids if area.get('settings', None) is not None and 'mon_ids_iv' in area['settings']: # replace list name area['settings']['mon_ids_iv_raw'] = \ self.get_monlist(area['settings'].get('mon_ids_iv', None), area.get("name", "unknown")) route_resource = self.__data_manager.get_resource( 'routecalc', identifier=area["routecalc"]) calc_type: str = area.get("route_calc_algorithm", "route") route_manager = RouteManagerFactory.get_routemanager( self.__db_wrapper, self.__data_manager, area_id, None, mode_mapping.get(mode, {}).get("range", 0), mode_mapping.get(mode, {}).get("max_count", 99999999), geofence_included, path_to_exclude_geofence=geofence_excluded, mode=mode, settings=area.get("settings", None), init=area.get("init", False), name=area.get("name", "unknown"), level=area.get("level", False), coords_spawns_known=area.get("coords_spawns_known", False), routefile=route_resource, calctype=calc_type, joinqueue=self.join_routes_queue, s2_level=mode_mapping.get(mode, {}).get("s2_cell_level", 30), include_event_id=area.get("settings", {}).get("include_event_id", None)) logger.info("Initializing area {}", area["name"]) if mode not in ("iv_mitm", "idle") and calc_type != "routefree": coords = self.__fetch_coords( mode, geofence_helper, coords_spawns_known=area.get("coords_spawns_known", False), init=area.get("init", False), range_init=mode_mapping.get(mode, {}).get("range_init", 630), including_stops=area.get("including_stops", False), include_event_id=area.get("settings", {}).get("include_event_id", None)) route_manager.add_coords_list(coords) max_radius = mode_mapping[mode]["range"] max_count_in_radius = mode_mapping[mode]["max_count"] if not area.get("init", False): proc = thread_pool.apply_async( route_manager.initial_calculation, args=(max_radius, max_count_in_radius, 0, False)) areas_procs[area_id] = proc else: logger.info("Init mode enabled. Going row-based for {}", area.get("name", "unknown")) # we are in init, let's write the init route to file to make it visible in madmin calc_coords = [] if area["routecalc"] is not None: for loc in coords: calc_coord = '%s,%s' % (str(loc.lat), str(loc.lng)) calc_coords.append(calc_coord) route_resource['routefile'] = calc_coords route_resource.save() # gotta feed the route to routemanager... TODO: without recalc... proc = thread_pool.apply_async(route_manager.recalc_route, args=(1, 99999999, 0, False)) areas_procs[area_id] = proc area_dict["routemanager"] = route_manager areas[area_id] = area_dict for area in areas_procs.keys(): to_be_checked = areas_procs[area] to_be_checked.get() thread_pool.close() thread_pool.join() return areas
class ApiClient(object): """Generic API client for Swagger client library builds. Swagger generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of the methods and models for each application are generated from the Swagger templates. NOTE: This class is auto generated by the swagger code generator program. Ref: https://github.com/swagger-api/swagger-codegen Do not edit the class manually. :param configuration: .Configuration object for this client :param header_name: a header to pass when making calls to the API. :param header_value: a header value to pass when making calls to the API. :param cookie: a cookie to include in the header when making calls to the API """ PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types NATIVE_TYPES_MAPPING = { 'int': int, 'long': int if six.PY3 else long, # noqa: F821 'float': float, 'str': str, 'bool': bool, 'date': datetime.date, 'datetime': datetime.datetime, 'object': object, } def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration self.pool = ThreadPool() self.rest_client = rest.RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/1.1.0/python' def __del__(self): self.pool.close() self.pool.join() @property def user_agent(self): """User agent for this API client""" return self.default_headers['User-Agent'] @user_agent.setter def user_agent(self, value): self.default_headers['User-Agent'] = value def set_default_header(self, header_name, header_value): self.default_headers[header_name] = header_value def __call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): config = self.configuration # header parameters header_params = header_params or {} header_params.update(self.default_headers) if self.cookie: header_params['Cookie'] = self.cookie if header_params: header_params = self.sanitize_for_serialization(header_params) header_params = dict( self.parameters_to_tuples(header_params, collection_formats)) # path parameters if path_params: path_params = self.sanitize_for_serialization(path_params) path_params = self.parameters_to_tuples(path_params, collection_formats) for k, v in path_params: # specified safe chars, encode everything resource_path = resource_path.replace( '{%s}' % k, quote(str(v), safe=config.safe_chars_for_path_param)) # query parameters if query_params: query_params = self.sanitize_for_serialization(query_params) query_params = self.parameters_to_tuples(query_params, collection_formats) # post parameters if post_params or files: post_params = self.prepare_post_parameters(post_params, files) post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) # auth setting self.update_params_for_auth(header_params, query_params, auth_settings) # body if body: body = self.sanitize_for_serialization(body) # request url url = self.configuration.host + resource_path # perform request and return response response_data = self.request(method, url, query_params=query_params, headers=header_params, post_params=post_params, body=body, _preload_content=_preload_content, _request_timeout=_request_timeout) self.last_response = response_data return_data = response_data if _preload_content: # deserialize response data if response_type: return_data = self.deserialize(response_data, response_type) else: return_data = None if _return_http_data_only: return (return_data) else: return (return_data, response_data.status, response_data.getheaders()) def sanitize_for_serialization(self, obj): """Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is swagger model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. """ if obj is None: return None elif isinstance(obj, self.PRIMITIVE_TYPES): return obj elif isinstance(obj, list): return [ self.sanitize_for_serialization(sub_obj) for sub_obj in obj ] elif isinstance(obj, tuple): return tuple( self.sanitize_for_serialization(sub_obj) for sub_obj in obj) elif isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() if isinstance(obj, dict): obj_dict = obj else: # Convert model obj to dict except # attributes `swagger_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = { obj.attribute_map[attr]: getattr(obj, attr) for attr, _ in six.iteritems(obj.swagger_types) if getattr(obj, attr) is not None } return { key: self.sanitize_for_serialization(val) for key, val in six.iteritems(obj_dict) } def deserialize(self, response, response_type): """Deserializes response into an object. :param response: RESTResponse object to be deserialized. :param response_type: class literal for deserialized object, or string of class name. :return: deserialized object. """ # handle file downloading # save response body into a tmp file and return the instance if response_type == "file": return self.__deserialize_file(response) # fetch data from response object try: data = json.loads(response.data) except ValueError: data = response.data return self.__deserialize(data, response_type) def __deserialize(self, data, klass): """Deserializes dict, list, str into an object. :param data: dict, list or str. :param klass: class literal, or string of class name. :return: object. """ if data is None: return None if type(klass) == str: if klass.startswith('list['): sub_kls = re.match('list\[(.*)\]', klass).group(1) return [ self.__deserialize(sub_data, sub_kls) for sub_data in data ] if klass.startswith('dict('): sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) return { k: self.__deserialize(v, sub_kls) for k, v in six.iteritems(data) } # convert str to class if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: klass = getattr(flagr.models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) elif klass == object: return self.__deserialize_object(data) elif klass == datetime.date: return self.__deserialize_date(data) elif klass == datetime.datetime: return self.__deserialize_datatime(data) else: return self.__deserialize_model(data, klass) def call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, is_async=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): """Makes the HTTP request (synchronous) and returns deserialized data. To make an async request, set the is_async parameter. :param resource_path: Path to method endpoint. :param method: Method to call. :param path_params: Path parameters in the url. :param query_params: Query parameters in the url. :param header_params: Header parameters to be placed in the request header. :param body: Request body. :param post_params dict: Request post form parameters, for `application/x-www-form-urlencoded`, `multipart/form-data`. :param auth_settings list: Auth Settings names for the request. :param response: Response data type. :param files dict: key -> filename, value -> filepath, for `multipart/form-data`. :param is_async bool: execute request asynchronously :param _return_http_data_only: response data without head status code and headers :param collection_formats: dict of collection formats for path, query, header, and post parameters. :param _preload_content: if False, the urllib3.HTTPResponse object will be returned without reading/decoding response data. Default is True. :param _request_timeout: timeout setting for this request. If one number provided, it will be total request timeout. It can also be a pair (tuple) of (connection, read) timeouts. :return: If is_async parameter is True, the request will be called asynchronously. The method will return the request thread. If parameter is_async is False or missing, then the method will return the response directly. """ if not is_async: return self.__call_api(resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout) else: thread = self.pool.apply_async( self.__call_api, (resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout)) return thread def request(self, method, url, query_params=None, headers=None, post_params=None, body=None, _preload_content=True, _request_timeout=None): """Makes the HTTP request using RESTClient.""" if method == "GET": return self.rest_client.GET(url, query_params=query_params, _preload_content=_preload_content, _request_timeout=_request_timeout, headers=headers) elif method == "HEAD": return self.rest_client.HEAD(url, query_params=query_params, _preload_content=_preload_content, _request_timeout=_request_timeout, headers=headers) elif method == "OPTIONS": return self.rest_client.OPTIONS(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "POST": return self.rest_client.POST(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "PUT": return self.rest_client.PUT(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "PATCH": return self.rest_client.PATCH(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "DELETE": return self.rest_client.DELETE(url, query_params=query_params, headers=headers, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) else: raise ValueError("http method must be `GET`, `HEAD`, `OPTIONS`," " `POST`, `PATCH`, `PUT` or `DELETE`.") def parameters_to_tuples(self, params, collection_formats): """Get parameters as list of tuples, formatting collections. :param params: Parameters as dict or list of two-tuples :param dict collection_formats: Parameter collection formats :return: Parameters as list of tuples, collections formatted """ new_params = [] if collection_formats is None: collection_formats = {} for k, v in six.iteritems(params) if isinstance( params, dict) else params: # noqa: E501 if k in collection_formats: collection_format = collection_formats[k] if collection_format == 'multi': new_params.extend((k, value) for value in v) else: if collection_format == 'ssv': delimiter = ' ' elif collection_format == 'tsv': delimiter = '\t' elif collection_format == 'pipes': delimiter = '|' else: # csv is the default delimiter = ',' new_params.append( (k, delimiter.join(str(value) for value in v))) else: new_params.append((k, v)) return new_params def prepare_post_parameters(self, post_params=None, files=None): """Builds form parameters. :param post_params: Normal form parameters. :param files: File parameters. :return: Form parameters with files. """ params = [] if post_params: params = post_params if files: for k, v in six.iteritems(files): if not v: continue file_names = v if type(v) is list else [v] for n in file_names: with open(n, 'rb') as f: filename = os.path.basename(f.name) filedata = f.read() mimetype = (mimetypes.guess_type(filename)[0] or 'application/octet-stream') params.append( tuple([k, tuple([filename, filedata, mimetype])])) return params def select_header_accept(self, accepts): """Returns `Accept` based on an array of accepts provided. :param accepts: List of headers. :return: Accept (e.g. application/json). """ if not accepts: return accepts = [x.lower() for x in accepts] if 'application/json' in accepts: return 'application/json' else: return ', '.join(accepts) def select_header_content_type(self, content_types): """Returns `Content-Type` based on an array of content_types provided. :param content_types: List of content-types. :return: Content-Type (e.g. application/json). """ if not content_types: return 'application/json' content_types = [x.lower() for x in content_types] if 'application/json' in content_types or '*/*' in content_types: return 'application/json' else: return content_types[0] def update_params_for_auth(self, headers, querys, auth_settings): """Updates header and query params based on authentication setting. :param headers: Header parameters dict to be updated. :param querys: Query parameters tuple list to be updated. :param auth_settings: Authentication setting identifiers list. """ if not auth_settings: return for auth in auth_settings: auth_setting = self.configuration.auth_settings().get(auth) if auth_setting: if not auth_setting['value']: continue elif auth_setting['in'] == 'header': headers[auth_setting['key']] = auth_setting['value'] elif auth_setting['in'] == 'query': querys.append((auth_setting['key'], auth_setting['value'])) else: raise ValueError( 'Authentication token must be in `query` or `header`') def __deserialize_file(self, response): """Deserializes body to file Saves response body into a file in a temporary folder, using the filename from the `Content-Disposition` header if provided. :param response: RESTResponse. :return: file path. """ fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) os.close(fd) os.remove(path) content_disposition = response.getheader("Content-Disposition") if content_disposition: filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', content_disposition).group(1) path = os.path.join(os.path.dirname(path), filename) with open(path, "wb") as f: f.write(response.data) return path def __deserialize_primitive(self, data, klass): """Deserializes string to primitive type. :param data: str. :param klass: class literal. :return: int, long, float, str, bool. """ try: return klass(data) except UnicodeEncodeError: return six.text_type(data) except TypeError: return data def __deserialize_object(self, value): """Return a original value. :return: object. """ return value def __deserialize_date(self, string): """Deserializes string to date. :param string: str. :return: date. """ try: from dateutil.parser import parse return parse(string).date() except ImportError: return string except ValueError: raise rest.ApiException( status=0, reason="Failed to parse `{0}` as date object".format(string)) def __deserialize_datatime(self, string): """Deserializes string to datetime. The string should be in iso8601 datetime format. :param string: str. :return: datetime. """ try: from dateutil.parser import parse return parse(string) except ImportError: return string except ValueError: raise rest.ApiException( status=0, reason=( "Failed to parse `{0}` as datetime object".format(string))) def __deserialize_model(self, data, klass): """Deserializes list or dict to model. :param data: dict, list. :param klass: class literal. :return: model object. """ if not klass.swagger_types and not hasattr(klass, 'get_real_child_model'): return data kwargs = {} if klass.swagger_types is not None: for attr, attr_type in six.iteritems(klass.swagger_types): if (data is not None and klass.attribute_map[attr] in data and isinstance(data, (list, dict))): value = data[klass.attribute_map[attr]] kwargs[attr] = self.__deserialize(value, attr_type) instance = klass(**kwargs) if hasattr(instance, 'get_real_child_model'): klass_name = instance.get_real_child_model(data) if klass_name: instance = self.__deserialize(data, klass_name) return instance
def case_6(): pool = ThreadPool(cpu_count()) for _ in range(100): pool.apply_async(call_url) pool.close() pool.join()
def map(self, func, iterdata, extra_env=None, extra_meta=None, invoke_pool_threads=64, data_all_as_one=True, use_cached_runtime=True, overwrite_invoke_args=None, exclude_modules=[]): """ # FIXME work with an actual iterable instead of just a list data_all_as_one : upload the data as a single object; fewer tcp transactions (good) but potentially higher latency for workers (bad) use_cached_runtime : if runtime has been cached, use that. When set to False, redownloads runtime. """ data = list(iterdata) if not data: return [] host_job_meta = {} pool = ThreadPool(invoke_pool_threads) callset_id = wrenutil.create_callset_id() ### pickle func and all data (to capture module dependencies func_and_data_ser, mod_paths = self.serializer([func] + data) print mod_paths func_str = func_and_data_ser[0] data_strs = func_and_data_ser[1:] data_size_bytes = sum(len(x) for x in data_strs) agg_data_key = None host_job_meta['agg_data'] = False host_job_meta['data_size_bytes'] = data_size_bytes if data_size_bytes < wrenconfig.MAX_AGG_DATA_SIZE and data_all_as_one: agg_data_key = storage_utils.create_agg_data_key( self.storage.prefix, callset_id) agg_data_bytes, agg_data_ranges = self.agg_data(data_strs) agg_upload_time = time.time() self.storage.put_data(agg_data_key, agg_data_bytes) host_job_meta['agg_data'] = True host_job_meta['data_upload_time'] = time.time() - agg_upload_time host_job_meta['data_upload_timestamp'] = time.time() else: # FIXME add warning that you wanted data all as one but # it exceeded max data size pass for module in exclude_modules: for mod_path in list(mod_paths): if module in mod_path and mod_path in mod_paths: mod_paths.remove(mod_path) module_data = create_mod_data(mod_paths) func_str_encoded = wrenutil.bytes_to_b64str(func_str) #debug_foo = {'func' : func_str_encoded, # 'module_data' : module_data} #pickle.dump(debug_foo, open("/tmp/py35.debug.pickle", 'wb')) ### Create func and upload func_module_str = json.dumps({ 'func': func_str_encoded, 'module_data': module_data }) host_job_meta['func_module_str_len'] = len(func_module_str) func_upload_time = time.time() func_key = create_func_key(self.storage.prefix, callset_id) self.storage.put_func(func_key, func_module_str) host_job_meta['func_upload_time'] = time.time() - func_upload_time host_job_meta['func_upload_timestamp'] = time.time() def invoke(data_str, callset_id, call_id, func_key, host_job_meta, agg_data_key=None, data_byte_range=None): data_key, output_key, status_key \ = storage_utils.create_keys(self.storage.prefix, callset_id, call_id) host_job_meta['job_invoke_timestamp'] = time.time() if agg_data_key is None: data_upload_time = time.time() self.put_data(data_key, data_str, callset_id, call_id) data_upload_time = time.time() - data_upload_time host_job_meta['data_upload_time'] = data_upload_time host_job_meta['data_upload_timestamp'] = time.time() data_key = data_key else: data_key = agg_data_key return self.invoke_with_keys( func_key, data_key, output_key, status_key, callset_id, call_id, extra_env, extra_meta, data_byte_range, use_cached_runtime, host_job_meta.copy(), self.job_max_runtime, overwrite_invoke_args=overwrite_invoke_args) N = len(data) call_result_objs = [] for i in range(N): call_id = "{:05d}".format(i) data_byte_range = None if agg_data_key is not None: data_byte_range = agg_data_ranges[i] cb = pool.apply_async( invoke, (data_strs[i], callset_id, call_id, func_key, host_job_meta.copy(), agg_data_key, data_byte_range)) logger.info("map {} {} apply async".format(callset_id, call_id)) call_result_objs.append(cb) res = [c.get() for c in call_result_objs] pool.close() pool.join() logger.info("map invoked {} {} pool join".format(callset_id, call_id)) # FIXME take advantage of the callset to return a lot of these # note these are just the invocation futures return res
def create_themeball(report, progress=None, abort=None): pool = ThreadPool(processes=cpu_count()) buf = BytesIO() num = count() error_occurred = Event() def optimize(name): if abort is not None and abort.is_set(): return if error_occurred.is_set(): return try: i = next(num) if progress is not None: progress(i, _('Optimizing %s') % name) srcpath = os.path.join(report.path, name) ext = srcpath.rpartition('.')[-1].lower() if ext == 'png': optimize_png(srcpath) elif ext in ('jpg', 'jpeg'): optimize_jpeg(srcpath) except Exception: return sys.exc_info() errors = tuple( filter(None, pool.map(optimize, tuple(report.name_map.iterkeys())))) pool.close(), pool.join() if abort is not None and abort.is_set(): return if errors: e = errors[0] reraise(*e) if progress is not None: progress(next(num), _('Creating theme file')) with ZipFile(buf, 'w') as zf: for name in report.name_map: srcpath = os.path.join(report.path, name) with lopen(srcpath, 'rb') as f: zf.writestr(name, f.read(), compression=ZIP_STORED) buf.seek(0) out = BytesIO() if abort is not None and abort.is_set(): return None, None if progress is not None: progress(next(num), _('Compressing theme file')) compress(buf, out, level=9) buf = BytesIO() prefix = report.name if abort is not None and abort.is_set(): return None, None with ZipFile(buf, 'w') as zf: with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f: zf.writestr(prefix + '/' + THEME_METADATA, f.read()) zf.writestr(prefix + '/' + THEME_COVER, create_cover(report)) zf.writestr(prefix + '/' + 'icons.zip.xz', out.getvalue(), compression=ZIP_STORED) if progress is not None: progress(next(num), _('Finished')) return buf.getvalue(), prefix
def main(): """ Main entry point for the test suite. """ t0 = time.time() num_cpus = multiprocessing.cpu_count() mem_total = psutil.virtual_memory().total / (1024 * 1024) # configure the CLI parser = argparse.ArgumentParser() parser.add_argument("--exhaustive", help="check all configurations on all examples", action="store_true") parser.add_argument("--all-configs", help="check all configurations per example", action="store_true") parser.add_argument("--all-examples", help="check all examples", action="store_true") parser.add_argument("--folder", action="store", default="**", type=str, help="sets the regressions folder to run") parser.add_argument("--threads", action="store", dest="n_threads", default=num_cpus, type=int, help="execute regressions using the selected number of threads in parallel") parser.add_argument("--log", action="store", dest="log_level", default="DEBUG", type=str, help="sets the logging level (DEBUG, INFO, WARNING)") parser.add_argument("--output-log", action="store", dest="log_path", type=str, help="sets the output log path. (std out by default)") args = parser.parse_args() if args.exhaustive: args.all_examples = True; args.all_configs = True; # configure the logging log_format = '' log_level = logging.DEBUG # add more log levels later (if needed) if args.log_level.upper() == "INFO": log_level = logging.INFO elif args.log_level.upper() == "WARNING": log_level = logging.WARNING # if the user supplied a log path, write the logs to that file. # otherwise, write the logs to std out. if args.log_path: logging.basicConfig(filename=args.log_path, format=log_format, level=log_level) else: logging.basicConfig(format=log_format, level=log_level) logging.debug("Creating Pool with '%d' Workers" % args.n_threads) p = ThreadPool(processes=args.n_threads) try: # start the tests logging.info("Running regression tests...") # start processing the tests. results = [] for test in sorted(glob.glob("./" + args.folder + "/*.c")): # get the meta data for this test meta = metadata(test) if meta['memory-limit'] > mem_total: continue if meta['skip'] == True: continue if meta['skip'] != False and not args.all_examples: continue # build up the subprocess command cmd = ['smack', test] cmd += ['--time-limit', str(meta['time-limit'])] cmd += meta['flags'] for memory in meta['memory'][:100 if args.all_configs else 1]: cmd += ['--mem-mod=' + memory] for verifier in meta['verifiers'][:100 if args.all_configs else 1]: name = path.splitext(path.basename(test))[0] cmd += ['--verifier=' + verifier] cmd += ['-bc', "%s-%s-%s.bc" % (name, memory, verifier)] cmd += ['-bpl', "%s-%s-%s.bpl" % (name, memory, verifier)] r = p.apply_async(process_test, args=(cmd[:], test, memory, verifier, meta['expect'], meta['checkbpl'], meta['checkout'], args.log_path,), callback=tally_result) results.append(r) # keep the main thread active while there are active workers for r in results: r.wait() except KeyboardInterrupt: logging.debug("Caught KeyboardInterrupt, terminating workers") p.terminate() # terminate any remaining workers p.join() else: logging.debug("Quitting normally") # close the pool. this prevents any more tasks from being submitted. p.close() p.join() # wait for all workers to finish their tasks # log the elapsed time elapsed_time = time.time() - t0 logging.info(' ELAPSED TIME [%.2fs]' % round(elapsed_time, 2)) # log the test results logging.info(' PASSED count: %d' % passed) logging.info(' FAILED count: %d' % failed) logging.info(' TIMEOUT count: %d' % timeouts) logging.info(' UNKNOWN count: %d' % unknowns) # if there are any failed tests or tests that timed out, set the system # exit code to a failure status if timeouts > 0 or failed > 0 or unknowns > 0: sys.exit(1)
def refresh(self, datasource_names, merge_flag, refreshAll): """ Fetches metadata for the specified datasources andm merges to the Superset database """ session = db.session ds_list = ( session.query(DruidDatasource) .filter(or_(DruidDatasource.datasource_name == name for name in datasource_names)) ) ds_map = {ds.name: ds for ds in ds_list} for ds_name in datasource_names: datasource = ds_map.get(ds_name, None) if not datasource: datasource = DruidDatasource(datasource_name=ds_name) with session.no_autoflush: session.add(datasource) flasher( 'Adding new datasource [{}]'.format(ds_name), 'success') ds_map[ds_name] = datasource elif refreshAll: flasher( 'Refreshing datasource [{}]'.format(ds_name), 'info') else: del ds_map[ds_name] continue datasource.cluster = self datasource.merge_flag = merge_flag session.flush() # Prepare multithreaded executation pool = ThreadPool() ds_refresh = list(ds_map.values()) metadata = pool.map(_fetch_metadata_for, ds_refresh) pool.close() pool.join() for i in range(0, len(ds_refresh)): datasource = ds_refresh[i] cols = metadata[i] if cols: col_objs_list = ( session.query(DruidColumn) .filter(DruidColumn.datasource_id == datasource.id) .filter(or_(DruidColumn.column_name == col for col in cols)) ) col_objs = {col.column_name: col for col in col_objs_list} for col in cols: if col == '__time': # skip the time column continue col_obj = col_objs.get(col, None) if not col_obj: col_obj = DruidColumn( datasource_id=datasource.id, column_name=col) with session.no_autoflush: session.add(col_obj) datatype = cols[col]['type'] if datatype == 'STRING': col_obj.groupby = True col_obj.filterable = True if datatype == 'hyperUnique' or datatype == 'thetaSketch': col_obj.count_distinct = True # Allow sum/min/max for long or double if datatype == 'LONG' or datatype == 'DOUBLE': col_obj.sum = True col_obj.min = True col_obj.max = True col_obj.type = datatype col_obj.datasource = datasource datasource.generate_metrics_for(col_objs_list) session.commit()
def validate(self, hostname, connection, ignore_no_ocsp=False): u""" Validates the certificate is not revoked using OCSP """ global OCSP_VALIDATION_CACHE_UPDATED logger.debug(u'validating certificate: %s', hostname) if ignore_no_ocsp: logger.debug(u'validation was skipped.') return True if hostname in KNOWN_HOSTNAMES: # skip OCSP validation if known logger.debug( 'validation was skipped, because hostname %s is known', hostname) return True cert_data = _extract_certificate_chain(connection) pool = ThreadPool(len(cert_data)) results = [] try: for issuer_and_subject in cert_data: ocsp_uri = issuer_and_subject['subject'][ 'ocsp_uri'] # issuer's ocsp uri ocsp_subject = issuer_and_subject['subject'] ocsp_issuer = issuer_and_subject['issuer'] logger.debug('ocsp_uri: %s', ocsp_uri) if ocsp_uri: r = pool.apply_async( self.validate_by_direct_connection_simple, [ocsp_uri, ocsp_issuer, ocsp_subject]) results.append(r) else: raise OperationalError( msg=(u'NO OCSP URI was found: ' u'hostname={0}, subject={1}').format( hostname, ocsp_subject), errno=ER_FAILED_TO_GET_OCSP_URI, ) finally: pool.close() pool.join() for r in results: if not r.successful(): raise OperationalError( msg=(u'Failed to validate the certificate ' u'revocation status: ' u'hostname={0}, err={1}', hostname, r.get())) with OCSP_VALIDATION_CACHE_LOCK: if OCSP_VALIDATION_CACHE_UPDATED: update_ocsp_response_cache_file( self._ocsp_response_cache_uri) OCSP_VALIDATION_CACHE_UPDATED = False if len(results) != len(cert_data): raise OperationalError( msg=u"Failed to validate the certificate " u"revocation status. The number of validation " u"didn't match: hostname={0}, retsults={1}, " u"cert_data={2}".format(hostname, len(results), len(cert_data)), errno=ER_INVALID_OCSP_RESPONSE) logger.debug(u'ok') # any failure must be an exception return True
class ErrBot(Backend, StoreMixin): """ ErrBot is the layer taking care of commands management and dispatching. """ __errdoc__ = """ Commands related to the bot administration """ MSG_ERROR_OCCURRED = 'Computer says nooo. See logs for details' MSG_UNKNOWN_COMMAND = 'Unknown command: "%(command)s". ' startup_time = datetime.now() def __init__(self, bot_config): log.debug("ErrBot init.") super().__init__(bot_config) self.bot_config = bot_config self.prefix = bot_config.BOT_PREFIX if bot_config.BOT_ASYNC: self.thread_pool = ThreadPool(bot_config.BOT_ASYNC_POOLSIZE) log.debug('created a thread pool of size %d.', bot_config.BOT_ASYNC_POOLSIZE) self.commands = { } # the dynamically populated list of commands available on the bot self.re_commands = { } # the dynamically populated list of regex-based commands available on the bot self.command_filters = [] # the dynamically populated list of filters self.MSG_UNKNOWN_COMMAND = 'Unknown command: "%(command)s". ' \ 'Type "' + bot_config.BOT_PREFIX + 'help" for available commands.' if bot_config.BOT_ALT_PREFIX_CASEINSENSITIVE: self.bot_alt_prefixes = tuple( prefix.lower() for prefix in bot_config.BOT_ALT_PREFIXES) else: self.bot_alt_prefixes = bot_config.BOT_ALT_PREFIXES self.repo_manager = None self.plugin_manager = None self.storage_plugin = None self._plugin_errors_during_startup = None self.flow_executor = FlowExecutor(self) self._gbl = RLock() # this protects internal structures of this class def attach_repo_manager(self, repo_manager): self.repo_manager = repo_manager def attach_plugin_manager(self, plugin_manager): self.plugin_manager = plugin_manager def attach_storage_plugin(self, storage_plugin): # the storage_plugin is needed by the plugins self.storage_plugin = storage_plugin def initialize_backend_storage(self): """ Initialize storage for the backend to use. """ log.debug("Initializing backend storage") assert self.plugin_manager is not None assert self.storage_plugin is not None self.open_storage(self.storage_plugin, f'{self.mode}_backend') @property def all_commands(self): """Return both commands and re_commands together.""" with self._gbl: newd = dict(**self.commands) newd.update(self.re_commands) return newd def _dispatch_to_plugins(self, method, *args, **kwargs): """ Dispatch the given method to all active plugins. Will catch and log any exceptions that occur. :param method: The name of the function to dispatch. :param *args: Passed to the callback function. :param **kwargs: Passed to the callback function. """ for plugin in self.plugin_manager.get_all_active_plugins(): plugin_name = plugin.name log.debug('Triggering %s on %s.', method, plugin_name) # noinspection PyBroadException try: getattr(plugin, method)(*args, **kwargs) except Exception: log.exception('%s on %s crashed.', method, plugin_name) def send(self, identifier, text, in_reply_to=None, groupchat_nick_reply=False): """ Sends a simple message to the specified user. :param identifier: an identifier from build_identifier or from an incoming message :param in_reply_to: the original message the bot is answering from :param text: the markdown text you want to send :param groupchat_nick_reply: authorized the prefixing with the nick form the user """ # protect a little bit the backends here if not isinstance(identifier, Identifier): raise ValueError("identifier should be an Identifier") msg = self.build_message(text) msg.to = identifier msg.frm = in_reply_to.to if in_reply_to else self.bot_identifier msg.parent = in_reply_to nick_reply = self.bot_config.GROUPCHAT_NICK_PREFIXED if isinstance(identifier, Room) and in_reply_to and (nick_reply or groupchat_nick_reply): self.prefix_groupchat_reply(msg, in_reply_to.frm) self.split_and_send_message(msg) def send_templated(self, identifier, template_name, template_parameters, in_reply_to=None, groupchat_nick_reply=False): """ Sends a simple message to the specified user using a template. :param template_parameters: the parameters for the template. :param template_name: the template name you want to use. :param identifier: an identifier from build_identifier or from an incoming message, a room etc. :param in_reply_to: the original message the bot is answering from :param groupchat_nick_reply: authorized the prefixing with the nick form the user """ text = self.process_template(template_name, template_parameters) return self.send(identifier, text, in_reply_to, groupchat_nick_reply) def split_and_send_message(self, msg): for part in split_string_after(msg.body, self.bot_config.MESSAGE_SIZE_LIMIT): partial_message = msg.clone() partial_message.body = part partial_message.partial = True self.send_message(partial_message) def send_message(self, msg): """ This needs to be overridden by the backends with a super() call. :param msg: the message to send. :return: None """ for bot in self.plugin_manager.get_all_active_plugins(): # noinspection PyBroadException try: bot.callback_botmessage(msg) except Exception: log.exception("Crash in a callback_botmessage handler") def send_card(self, card): """ Sends a card, this can be overriden by the backends *without* a super() call. :param card: the card to send. :return: None """ self.send_templated(card.to, 'card', {'card': card}) def send_simple_reply(self, msg, text, private=False, threaded=False): """Send a simple response to a given incoming message :param private: if True will force a response in private. :param threaded: if True and if the backend supports it, sends the response in a threaded message. :param text: the markdown text of the message. :param msg: the message you are replying to. """ reply = self.build_reply(msg, text, private=private, threaded=threaded) if isinstance(reply.to, Room) and self.bot_config.GROUPCHAT_NICK_PREFIXED: self.prefix_groupchat_reply(reply, msg.frm) self.split_and_send_message(reply) def process_message(self, msg): """Check if the given message is a command for the bot and act on it. It return True for triggering the callback_messages on the .callback_messages on the plugins. :param msg: the incoming message. """ # Prepare to handle either private chats or group chats frm = msg.frm text = msg.body if not hasattr(msg.frm, 'person'): raise Exception( f'msg.frm not an Identifier as it misses the "person" property.' f' Class of frm : {msg.frm.__class__}.') username = msg.frm.person user_cmd_history = self.cmd_history[username] if msg.delayed: log.debug('Message from history, ignore it.') return False if self.is_from_self(msg): log.debug("Ignoring message from self.") return False log.debug('*** frm = %s', frm) log.debug('*** username = %s', username) log.debug('*** text = %s', text) suppress_cmd_not_found = self.bot_config.SUPPRESS_CMD_NOT_FOUND prefixed = False # Keeps track whether text was prefixed with a bot prefix only_check_re_command = False # Becomes true if text is determed to not be a regular command tomatch = text.lower( ) if self.bot_config.BOT_ALT_PREFIX_CASEINSENSITIVE else text if len(self.bot_config.BOT_ALT_PREFIXES) > 0 and tomatch.startswith( self.bot_alt_prefixes): # Yay! We were called by one of our alternate prefixes. Now we just have to find out # which one... (And find the longest matching, in case you have 'err' and 'errbot' and # someone uses 'errbot', which also matches 'err' but would leave 'bot' to be taken as # part of the called command in that case) prefixed = True longest = 0 for prefix in self.bot_alt_prefixes: length = len(prefix) if tomatch.startswith(prefix) and length > longest: longest = length log.debug('Called with alternate prefix "%s"', text[:longest]) text = text[longest:] # Now also remove the separator from the text for sep in self.bot_config.BOT_ALT_PREFIX_SEPARATORS: # While unlikely, one may have separators consisting of # more than one character length = len(sep) if text[:length] == sep: text = text[length:] elif msg.is_direct and self.bot_config.BOT_PREFIX_OPTIONAL_ON_CHAT: log.debug( 'Assuming "%s" to be a command because BOT_PREFIX_OPTIONAL_ON_CHAT is True', text) # In order to keep noise down we surpress messages about the command # not being found, because it's possible a plugin will trigger on what # was said with trigger_message. suppress_cmd_not_found = True elif not text.startswith(self.bot_config.BOT_PREFIX): only_check_re_command = True if text.startswith(self.bot_config.BOT_PREFIX): text = text[len(self.bot_config.BOT_PREFIX):] prefixed = True text = text.strip() text_split = text.split(' ') cmd = None command = None args = '' if not only_check_re_command: i = len(text_split) while cmd is None: command = '_'.join(text_split[:i]) with self._gbl: if command in self.commands: cmd = command args = ' '.join(text_split[i:]) else: i -= 1 if i == 0: break if command == self.bot_config.BOT_PREFIX: # we did "!!" so recall the last command if len(user_cmd_history): cmd, args = user_cmd_history[-1] else: return False # no command in history elif command.isdigit( ): # we did "!#" so we recall the specified command index = int(command) if len(user_cmd_history) >= index: cmd, args = user_cmd_history[-index] else: return False # no command in history # Try to match one of the regex commands if the regular commands produced no match matched_on_re_command = False if not cmd: with self._gbl: if prefixed or (msg.is_direct and self.bot_config.BOT_PREFIX_OPTIONAL_ON_CHAT): commands = dict(self.re_commands) else: commands = { k: self.re_commands[k] for k in self.re_commands if not self.re_commands[k]._err_command_prefix_required } for name, func in commands.items(): if func._err_command_matchall: match = list(func._err_command_re_pattern.finditer(text)) else: match = func._err_command_re_pattern.search(text) if match: log.debug('Matching "%s" against "%s" produced a match.', text, func._err_command_re_pattern.pattern) matched_on_re_command = True self._process_command(msg, name, text, match) else: log.debug('Matching "%s" against "%s" produced no match.', text, func._err_command_re_pattern.pattern) if matched_on_re_command: return True if cmd: self._process_command(msg, cmd, args, match=None) elif not only_check_re_command: log.debug("Command not found") for cmd_filter in self.command_filters: if getattr(cmd_filter, 'catch_unprocessed', False): try: reply = cmd_filter(msg, cmd, args, False, emptycmd=True) if reply: self.send_simple_reply(msg, reply) # continue processing the other unprocessed cmd filters. except Exception: log.exception("Exception in a command filter command.") return True def _process_command_filters(self, msg, cmd, args, dry_run=False): try: for cmd_filter in self.command_filters: msg, cmd, args = cmd_filter(msg, cmd, args, dry_run) if msg is None: return None, None, None return msg, cmd, args except Exception: log.exception( "Exception in a filter command, blocking the command in doubt") return None, None, None def _process_command(self, msg, cmd, args, match): """Process and execute a bot command""" # first it must go through the command filters msg, cmd, args = self._process_command_filters(msg, cmd, args, False) if msg is None: log.info('Command %s blocked or deferred.', cmd) return frm = msg.frm username = frm.person user_cmd_history = self.cmd_history[username] log.info( f'Processing command "{cmd}" with parameters "{args}" from {frm}') if (cmd, args) in user_cmd_history: user_cmd_history.remove( (cmd, args)) # Avoids duplicate history items with self._gbl: f = self.re_commands[cmd] if match else self.commands[cmd] if f._err_command_admin_only and self.bot_config.BOT_ASYNC: # If it is an admin command, wait until the queue is completely depleted so # we don't have strange concurrency issues on load/unload/updates etc... self.thread_pool.close() self.thread_pool.join() self.thread_pool = ThreadPool(self.bot_config.BOT_ASYNC_POOLSIZE) if f._err_command_historize: user_cmd_history.append( (cmd, args )) # add it to the history only if it is authorized to be so # Don't check for None here as None can be a valid argument to str.split. # '' was chosen as default argument because this isn't a valid argument to str.split() if not match and f._err_command_split_args_with != '': try: if hasattr(f._err_command_split_args_with, "parse_args"): args = f._err_command_split_args_with.parse_args(args) elif callable(f._err_command_split_args_with): args = f._err_command_split_args_with(args) else: args = args.split(f._err_command_split_args_with) except Exception as e: self.send_simple_reply( msg, f"Sorry, I couldn't parse your arguments. {e}") return if self.bot_config.BOT_ASYNC: result = self.thread_pool.apply_async( self._execute_and_send, [], { 'cmd': cmd, 'args': args, 'match': match, 'msg': msg, 'template_name': f._err_command_template }) if f._err_command_admin_only: # Again, if it is an admin command, wait until the queue is completely # depleted so we don't have strange concurrency issues. result.wait() else: self._execute_and_send(cmd=cmd, args=args, match=match, msg=msg, template_name=f._err_command_template) @staticmethod def process_template(template_name, template_parameters): # integrated templating # The template needs to be set and the answer from the user command needs to be a mapping # If not just convert the answer to string. if template_name and isinstance(template_parameters, collections.Mapping): return tenv().get_template(template_name + '.md').render(**template_parameters) # Reply should be all text at this point (See https://github.com/errbotio/errbot/issues/96) return str(template_parameters) def _execute_and_send(self, cmd, args, match, msg, template_name=None): """Execute a bot command and send output back to the caller :param cmd: The command that was given to the bot (after being expanded) :param args: Arguments given along with cmd :param match: A re.MatchObject if command is coming from a regex-based command, else None :param msg: The message object :param template_name: The name of the jinja template which should be used to render the markdown output, if any """ private = cmd in self.bot_config.DIVERT_TO_PRIVATE threaded = cmd in self.bot_config.DIVERT_TO_THREAD commands = self.re_commands if match else self.commands try: with self._gbl: method = commands[cmd] # first check if we need to reattach a flow context flow, _ = self.flow_executor.check_inflight_flow_triggered( cmd, msg.frm) if flow: log.debug("Reattach context from flow %s to the message", flow._root.name) msg.ctx = flow.ctx elif method._err_command_flow_only: # check if it is a flow_only command but we are not in a flow. log.debug( "%s is tagged flow_only and we are not in a flow. Ignores the command.", cmd) return if inspect.isgeneratorfunction(method): replies = method(msg, match) if match else method(msg, args) for reply in replies: if reply: self.send_simple_reply( msg, self.process_template(template_name, reply), private, threaded) else: reply = method(msg, match) if match else method(msg, args) if reply: self.send_simple_reply( msg, self.process_template(template_name, reply), private, threaded) # The command is a success, check if this has not made a flow progressed self.flow_executor.trigger(cmd, msg.frm, msg.ctx) except CommandError as command_error: reason = command_error.reason if command_error.template: reason = self.process_template(command_error.template, reason) self.send_simple_reply(msg, reason, private, threaded) except Exception as e: tb = traceback.format_exc() log.exception( f'An error happened while processing a message ("{msg.body}"): {tb}"' ) self.send_simple_reply(msg, self.MSG_ERROR_OCCURRED + f':\n{e}', private, threaded) def unknown_command(self, _, cmd, args): """ Override the default unknown command behavior """ full_cmd = cmd + ' ' + args.split(' ')[0] if args else None if full_cmd: msg = f'Command "{cmd}" / "{full_cmd}" not found.' else: msg = f'Command "{cmd}" not found.' ununderscore_keys = [m.replace('_', ' ') for m in self.commands.keys()] matches = difflib.get_close_matches(cmd, ununderscore_keys) if full_cmd: matches.extend( difflib.get_close_matches(full_cmd, ununderscore_keys)) matches = set(matches) if matches: alternatives = ('" or "' + self.bot_config.BOT_PREFIX).join(matches) msg += f'\n\nDid you mean "{self.bot_config.BOT_PREFIX}{alternatives}" ?' return msg def inject_commands_from(self, instance_to_inject): with self._gbl: plugin_name = instance_to_inject.name for name, value in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(value, '_err_command', False): commands = self.re_commands if getattr( value, '_err_re_command') else self.commands name = getattr(value, '_err_command_name') if name in commands: f = commands[name] new_name = (plugin_name + '-' + name).lower() self.warn_admins( f'{plugin_name}.{name} clashes with {type(f.__self__).__name__}.{f.__name__} ' f'so it has been renamed {new_name}') name = new_name value.__func__._err_command_name = new_name # To keep track of the renaming. commands[name] = value if getattr(value, '_err_re_command'): log.debug('Adding regex command : %s -> %s.', name, value.__name__) self.re_commands = commands else: log.debug('Adding command : %s -> %s.', name, value.__name__) self.commands = commands def inject_flows_from(self, instance_to_inject): classname = instance_to_inject.__class__.__name__ for name, method in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(method, '_err_flow', False): log.debug('Found new flow %s: %s', classname, name) flow = FlowRoot(name, method.__doc__) try: method(flow) except Exception: log.exception("Exception initializing a flow") self.flow_executor.add_flow(flow) def inject_command_filters_from(self, instance_to_inject): with self._gbl: for name, method in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(method, '_err_command_filter', False): log.debug('Adding command filter: %s', name) self.command_filters.append(method) def remove_flows_from(self, instance_to_inject): for name, value in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(value, '_err_flow', False): log.debug('Remove flow %s', name) # TODO(gbin) def remove_commands_from(self, instance_to_inject): with self._gbl: for name, value in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(value, '_err_command', False): name = getattr(value, '_err_command_name') if getattr(value, '_err_re_command') and name in self.re_commands: del self.re_commands[name] elif not getattr( value, '_err_re_command') and name in self.commands: del self.commands[name] def remove_command_filters_from(self, instance_to_inject): with self._gbl: for name, method in inspect.getmembers(instance_to_inject, inspect.ismethod): if getattr(method, '_err_command_filter', False): log.debug('Removing command filter: %s', name) self.command_filters.remove(method) def _admins_to_notify(self): """ Creates a list of administrators to notify """ admins_to_notify = self.bot_config.BOT_ADMINS_NOTIFICATIONS return admins_to_notify def warn_admins(self, warning: str) -> None: """ Send a warning to the administrators of the bot. :param warning: The markdown-formatted text of the message to send. """ for admin in self._admins_to_notify(): self.send(self.build_identifier(admin), warning) log.warning(warning) def callback_message(self, msg): """Processes for commands and dispatches the message to all the plugins.""" if self.process_message(msg): # Act only in the backend tells us that this message is OK to broadcast self._dispatch_to_plugins('callback_message', msg) def callback_mention(self, msg, people): log.debug("%s has/have been mentioned", ', '.join(str(p) for p in people)) self._dispatch_to_plugins('callback_mention', msg, people) def callback_presence(self, pres): self._dispatch_to_plugins('callback_presence', pres) def callback_room_joined(self, room): """ Triggered when the bot has joined a MUC. :param room: An instance of :class:`~errbot.backends.base.MUCRoom` representing the room that was joined. """ self._dispatch_to_plugins('callback_room_joined', room) def callback_room_left(self, room): """ Triggered when the bot has left a MUC. :param room: An instance of :class:`~errbot.backends.base.MUCRoom` representing the room that was left. """ self._dispatch_to_plugins('callback_room_left', room) def callback_room_topic(self, room): """ Triggered when the topic in a MUC changes. :param room: An instance of :class:`~errbot.backends.base.MUCRoom` representing the room for which the topic changed. """ self._dispatch_to_plugins('callback_room_topic', room) def callback_stream(self, stream): log.info('Initiated an incoming transfer %s.', stream) Tee(stream, self.plugin_manager.get_all_active_plugins()).start() def signal_connect_to_all_plugins(self): for bot in self.plugin_manager.get_all_active_plugins(): if hasattr(bot, 'callback_connect'): # noinspection PyBroadException try: log.debug('Trigger callback_connect on %s.', bot.__class__.__name__) bot.callback_connect() except Exception: log.exception(f'callback_connect failed for {bot}.') def connect_callback(self): log.info('Activate internal commands') if self._plugin_errors_during_startup: errors = f'Some plugins failed to start during bot startup:\n\n{self._plugin_errors_during_startup}' else: errors = '' errors += self.plugin_manager.activate_non_started_plugins() if errors: self.warn_admins(errors) log.info(errors) log.info('Notifying connection to all the plugins...') self.signal_connect_to_all_plugins() log.info('Plugin activation done.') def disconnect_callback(self): log.info('Disconnect callback, deactivating all the plugins.') self.plugin_manager.deactivate_all_plugins() def get_doc(self, command): """Get command documentation """ if not command.__doc__: return '(undocumented)' if self.prefix == '!': return command.__doc__ ununderscore_keys = (m.replace('_', ' ') for m in self.all_commands.keys()) pat = re.compile(fr'!({"|".join(ununderscore_keys)})') return re.sub(pat, self.prefix + '\1', command.__doc__) @staticmethod def get_plugin_class_from_method(meth): for cls in inspect.getmro(type(meth.__self__)): if meth.__name__ in cls.__dict__: return cls return None def get_command_classes(self): return (self.get_plugin_class_from_method(command) for command in self.all_commands.values()) def shutdown(self): self.close_storage() self.plugin_manager.shutdown() self.repo_manager.shutdown() def prefix_groupchat_reply(self, message: Message, identifier: Identifier): if message.body.startswith('#'): # Markdown heading, insert an extra newline to ensure the # markdown rendering doesn't break. message.body = "\n" + message.body
# print(r.status_code) # 返回状态码 t = time.time() if r.status_code == 200: aliyunoss.upload_file(path, r.content) print(path) # with open(path, 'wb')as f: # f.write(r.content) # f.close() # 将内容写入图片 # print(time.time()-t) return if __name__ == '__main__': with open('dongtaitu.json', 'r') as f: j = json.load(f) p = ThreadPool(200) for i in j: #print(i, i['src']) p.apply_async(func=download_img, args=(i['src'], 'gif/{}.gif'.format(i['title']))) # break # download_img(i['src'],'./gif/{}.gif'.format(i['title'])) #break p.close() p.join() print('done')
class SnowflakeChunkDownloader(object): u""" Large Result set chunk downloader class. """ def _pre_init(self, chunks, connection, cursor, qrmk, chunk_headers, query_result_format='JSON', prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS, use_ijson=False): self._use_ijson = use_ijson self._query_result_format = query_result_format self._downloader_error = None self._connection = connection self._cursor = cursor self._qrmk = qrmk self._chunk_headers = chunk_headers self._chunk_size = len(chunks) self._chunks = {} self._chunk_cond = Condition() self._effective_threads = min(prefetch_threads, self._chunk_size) if self._effective_threads < 1: self._effective_threads = 1 for idx, chunk in enumerate(chunks): logger.debug(u"queued chunk %d: rowCount=%s", idx, chunk[u'rowCount']) self._chunks[idx] = SnowflakeChunk(url=chunk[u'url'], result_data=None, ready=False, row_count=int( chunk[u'rowCount'])) logger.debug( u'prefetch threads: %s, ' u'number of chunks: %s, ' u'effective threads: %s', prefetch_threads, self._chunk_size, self._effective_threads) self._pool = ThreadPool(self._effective_threads) self._downloading_chunks_lock = Lock() self._total_millis_downloading_chunks = 0 self._total_millis_parsing_chunks = 0 self._next_chunk_to_consume = 0 def __init__(self, chunks, connection, cursor, qrmk, chunk_headers, query_result_format='JSON', prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS, use_ijson=False): self._pre_init(chunks, connection, cursor, qrmk, chunk_headers, query_result_format=query_result_format, prefetch_threads=prefetch_threads, use_ijson=use_ijson) logger.debug('Chunk Downloader in memory') for idx in range(self._effective_threads): self._pool.apply_async(self._download_chunk, [idx]) self._next_chunk_to_download = self._effective_threads def _download_chunk(self, idx): """ Downloads a chunk asynchronously """ logger.debug(u'downloading chunk %s/%s', idx + 1, self._chunk_size) headers = {} try: if self._chunk_headers is not None: headers = self._chunk_headers logger.debug(u'use chunk headers from result') elif self._qrmk is not None: headers[SSE_C_ALGORITHM] = SSE_C_AES headers[SSE_C_KEY] = self._qrmk logger.debug(u"started getting the result set %s: %s", idx + 1, self._chunks[idx].url) result_data = self._fetch_chunk(self._chunks[idx].url, headers) logger.debug(u"finished getting the result set %s: %s", idx + 1, self._chunks[idx].url) if isinstance(result_data, ResultIterWithTimings): metrics = result_data.get_timings() with self._downloading_chunks_lock: self._total_millis_downloading_chunks += metrics[ ResultIterWithTimings.DOWNLOAD] self._total_millis_parsing_chunks += metrics[ ResultIterWithTimings.PARSE] with self._chunk_cond: self._chunks[idx] = self._chunks[idx]._replace( result_data=result_data, ready=True) self._chunk_cond.notify_all() logger.debug(u'added chunk %s/%s to a chunk list.', idx + 1, self._chunk_size) except Exception as e: logger.exception( u'Failed to fetch the large result set chunk %s/%s', idx + 1, self._chunk_size) self._downloader_error = e def next_chunk(self): """ Gets the next chunk if ready """ logger.debug( u'next_chunk_to_consume={next_chunk_to_consume}, ' u'next_chunk_to_download={next_chunk_to_download}, ' u'total_chunks={total_chunks}'.format( next_chunk_to_consume=self._next_chunk_to_consume + 1, next_chunk_to_download=self._next_chunk_to_download + 1, total_chunks=self._chunk_size)) if self._next_chunk_to_consume > 0: # clean up the previously fetched data n = self._next_chunk_to_consume - 1 self._chunks[n] = self._chunks[n]._replace(result_data=None, ready=False) if self._next_chunk_to_download < self._chunk_size: self._pool.apply_async(self._download_chunk, [self._next_chunk_to_download]) self._next_chunk_to_download += 1 if self._downloader_error is not None: raise self._downloader_error for attempt in range(MAX_RETRY_DOWNLOAD): logger.debug( u'waiting for chunk %s/%s' u' in %s/%s download attempt', self._next_chunk_to_consume + 1, self._chunk_size, attempt + 1, MAX_RETRY_DOWNLOAD) done = False for wait_counter in range(MAX_WAIT): with self._chunk_cond: if self._downloader_error: raise self._downloader_error if self._chunks[self._next_chunk_to_consume].ready: done = True break logger.debug( u'chunk %s/%s is NOT ready to consume' u' in %s/%s(s)', self._next_chunk_to_consume + 1, self._chunk_size, (wait_counter + 1) * WAIT_TIME_IN_SECONDS, MAX_WAIT * WAIT_TIME_IN_SECONDS) self._chunk_cond.wait(WAIT_TIME_IN_SECONDS) else: logger.debug( u'chunk %s/%s is still NOT ready. Restarting chunk ' u'downloader threads', self._next_chunk_to_consume + 1, self._chunk_size) self._pool.terminate() # terminate the thread pool self._pool = ThreadPool(self._effective_threads) for idx0 in range(self._effective_threads): idx = idx0 + self._next_chunk_to_consume self._pool.apply_async(self._download_chunk, [idx]) if done: break else: Error.errorhandler_wrapper( self._connection, self._cursor, OperationalError, { u'msg': u'The result set chunk download fails or hang for ' u'unknown reason.', u'errno': ER_CHUNK_DOWNLOAD_FAILED }) logger.debug(u'chunk %s/%s is ready to consume', self._next_chunk_to_consume + 1, self._chunk_size) ret = self._chunks[self._next_chunk_to_consume] self._next_chunk_to_consume += 1 return ret def terminate(self): """ Terminates downloading the chunks. """ if hasattr(self, u'_pool') and self._pool is not None: self._pool.close() self._pool.join() self._pool = None def __del__(self): try: self.terminate() except: # ignore all errors in the destructor pass def _fetch_chunk(self, url, headers): """ Fetch the chunk from S3. """ handler = JsonBinaryHandler(is_raw_binary_iterator=True, use_ijson=self._use_ijson) \ if self._query_result_format == 'json' else \ ArrowBinaryHandler(self._cursor, self._connection) return self._connection.rest.fetch(u'get', url, headers, timeout=DEFAULT_REQUEST_TIMEOUT, is_raw_binary=True, binary_data_handler=handler)
def datasets_evaluate(dataset_file): threading=True k=1 kMax=10 p_entity=0 p_relation=0 global correctRelations correctRelations=0 global wrongRelations wrongRelations=0 global correctEntities correctEntities=0 global wrongEntities wrongEntities=0 count=1 startQ=0 endQ=5000 errors=0 results=[] p_e=0 p_r=0 #questions=read_dataset('datasets/simplequestions.txt') filepath = 'datasets/'+dataset_file questions=read_dataset(filepath) if threading: pool = ThreadPool(12) pool.map(evaluate, questions[:50]) pool.close() pool.join() else: for question in questions: try: single_result=evaluate(question) print(count) count=count+1 print( "#####" + str((correctRelations * 100) / (correctRelations + wrongRelations))) print("#####" + str((correctEntities * 100) / (correctEntities + wrongEntities))) results.append(single_result) except: errors+=1 print(errors) continue with open('results_simple_entities_FALCON.csv', mode='w', newline='', encoding='utf-8') as results_file: writer = csv.writer(results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerows(results) print("Correct Relations:",correctRelations) print("Relations:") print((correctRelations*100)/(correctRelations+wrongRelations)) print("Correct Entities:",correctEntities) print("Entities:") print((correctEntities*100)/(correctEntities+wrongEntities)) print(correctEntities+wrongEntities) ''''print("p_entity:")
class hxtool_scheduler: def __init__(self, thread_count=None): self._lock = threading.Lock() self.task_queue = {} self.history_queue = {} self.task_hx_api_sessions = {} self._poll_thread = threading.Thread(target=self._scan_task_queue, name="PollThread") self._stop_event = threading.Event() # Allow for thread oversubscription based on CPU count self.thread_count = thread_count or (cpu_count() + 1) self.task_threads = ThreadPool(self.thread_count) logger.info("Task scheduler initialized.") def _scan_task_queue(self): while not self._stop_event.wait(.1): ret = None with self._lock: ret = self.task_threads.imap_unordered( self._run_task, [_ for _ in self.task_queue.values() if _.should_run()]) if ret: while not self._stop_event.is_set(): try: ret.next(timeout=5) except TimeoutError: break except StopIteration: break except Exception as e: logger.error(pretty_exceptions(e)) continue def _run_task(self, task): ret = False task.set_state(TASK_STATE_QUEUED) logger.debug("Executing task with id: %s, name: %s.", task.task_id, task.name) try: ret = task.run(self) except Exception as e: logger.error(pretty_exceptions(e)) task.set_state(TASK_STATE_FAILED) finally: return ret def _add_task_api_task(self, profile_id, hx_host, hx_port, username, password): self.task_hx_api_sessions[profile_id] = HXAPI( hx_host, hx_port=hx_port, proxies=hxtool_global.hxtool_config['network'].get('proxies'), headers=hxtool_global.hxtool_config['headers'], cookies=hxtool_global.hxtool_config['cookies'], logger_name=hxtool_logging.getLoggerName(HXAPI.__name__), default_encoding=default_encoding) api_login_task = hxtool_scheduler_task( profile_id, "Task API Login - {}".format(hx_host), immutable=True) api_login_task.add_step(hxtool_task_modules.task_api_session_module, kwargs={ 'profile_id': profile_id, 'username': username, 'password': password }) self.add(api_login_task) def start(self): self._poll_thread.start() logger.info("Task scheduler started with %s threads.", self.thread_count) def stop(self): logger.debug("stop() enter.") self._stop_event.set() logger.debug("Closing the task thread pool.") self.task_threads.close() logger.debug("Waiting for running threads to terminate.") self.task_threads.join() logger.debug("stop() exit.") def initialize_task_api_sessions(self): # Loop through background credentials and start the API sessions profiles = hxtool_global.hxtool_db.profileList() for profile in profiles: task_api_credential = hxtool_global.hxtool_db.backgroundProcessorCredentialGet( profile['profile_id']) if task_api_credential: try: salt = HXAPI.b64(task_api_credential['salt'], True) iv = HXAPI.b64(task_api_credential['iv'], True) key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY) decrypted_background_password = crypt_aes( key, iv, task_api_credential['hx_api_encrypted_password'], decrypt=True) self._add_task_api_task( profile['profile_id'], profile['hx_host'], profile['hx_port'], task_api_credential['hx_api_username'], decrypted_background_password) decrypted_background_password = None except UnicodeDecodeError: logger.error( "Please reset the background credential for {} ({}).". format(profile['hx_host'], profile['profile_id'])) else: logger.info("No background credential for {} ({}).".format( profile['hx_host'], profile['profile_id'])) def add_task_api_session(self, profile_id, hx_host, hx_port, username, password): iv = crypt_generate_random(16) salt = crypt_generate_random(32) key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY) encrypted_password = crypt_aes(key, iv, password) hxtool_global.hxtool_db.backgroundProcessorCredentialCreate( profile_id, username, HXAPI.b64(iv), HXAPI.b64(salt), encrypted_password) encrypted_password = None self._add_task_api_task(profile_id, hx_host, hx_port, username, password) password = None def remove_task_api_session(self, profile_id): out = hxtool_global.hxtool_db.backgroundProcessorCredentialRemove( profile_id) hx_api_object = self.task_hx_api_sessions.get(profile_id) if hx_api_object and hx_api_object.restIsSessionValid(): (ret, response_code, response_data) = hx_api_object.restLogout() del self.task_hx_api_sessions[profile_id] def logout_task_api_sessions(self): for hx_api_object in self.task_hx_api_sessions.values(): if hx_api_object is not None: hx_api_object.restLogout() hx_api_object = None def signal_child_tasks(self, parent_task_id, parent_task_state, parent_stored_result): with self._lock: for task_id in self.task_queue: self.task_queue[task_id].parent_state_callback( parent_task_id, parent_task_state, parent_stored_result) def add(self, task, should_store=True): with self._lock: self.task_queue[task.task_id] = task task.set_state(TASK_STATE_SCHEDULED) # Note: this must be within the lock otherwise we run into a nasty race condition where the task runs before the stored state is set - # with the run lock taking precedence. if should_store: task.store() return task.task_id def add_list(self, tasks): if isinstance(tasks, list): for t in tasks: self.add(t) def remove(self, task_id, delete_children=True): if task_id: with self._lock: if delete_children: # We need to make a shallow copy so we don't modify the task_queue while iterating over it for child_task_id in [ _.task_id for _ in self.task_queue.values() if _.parent_id == task_id ]: self.task_queue[child_task_id].remove() del self.task_queue[child_task_id] for child_task_id in [ _['task_id'] for _ in self.history_queue.values() if _['parent_id'] == task_id ]: del self.history_queue[child_task_id] t = self.task_queue.get(task_id, None) if t and not t.immutable: t.remove() del self.task_queue[task_id] t = None elif task_id in self.history_queue: del self.history_queue[task_id] def get(self, task_id): with self._lock: return self.task_queue.get(task_id, None) def move_to_history(self, task_id): with self._lock: t = self.task_queue.pop(task_id, None) if t is not None: self.history_queue[task_id] = t.metadata() if len(self.history_queue) > MAX_HISTORY_QUEUE_LENGTH: self.history_queue.popitem() def tasks(self): # Shallow copy to avoid locking return [_.metadata() for _ in list(self.task_queue.values())] + list( self.history_queue.values()) # Load queued tasks from the database def load_from_database(self): try: if self.status(): tasks = hxtool_global.hxtool_db.taskList() for task_entry in tasks: p_id = task_entry.get('parent_id', None) if p_id and (not task_entry['parent_complete'] and not hxtool_global.hxtool_db.taskGet( task_entry['profile_id'], p_id)): logger.warn("Deleting orphan task {}, {}".format( task_entry['name'], task_entry['task_id'])) hxtool_global.hxtool_db.taskDelete( task_entry['profile_id'], task_entry['task_id']) else: task = hxtool_scheduler_task.deserialize(task_entry) task.set_stored() # Set should_store to False as we've already been stored, and we skip a needless update self.add(task, should_store=False) else: logger.warn( "Task scheduler must be running before loading queued tasks from the database." ) except Exception as e: logger.error( "Failed to load saved tasks from the database. Error: {}". format(pretty_exceptions(e))) def status(self): return self._poll_thread.is_alive()
def exec_tls(suites_file, target, tls_opts, serv_set, handshake=False, weight=False): # Step 1: Parse service list print('--- STARTING CIPHERSUITE SELECTION PROCESS ---') print(f'\nParsing ciphersuites from {suites_file}'.ljust( settings.strlen, '.'), end=' ', flush=True) total_ciphersuites = utils.parse_services(suites_file) n_total = len(total_ciphersuites) success_ciphersuites = [] not_ciphersuites = [] error_ciphersuites = [] current = 1 print(f'ok\nGot {n_total} ciphersuites') print('\nRunning with options:') print(f' -Starting security level: {tls_opts["sec_lvl"]}' + f'\n -Ending security level: {tls_opts["max_sec_lvl"]}' + f'\n -Starting input size: {tls_opts["msg_size"]} bytes' + f'\n -Ending input size: {tls_opts["max_msg_size"]} bytes' + f'\n -Number of tests: {tls_opts["n_tests"]}' + f'\n -Data\'s directory: {tls_opts["path"]}' f'\n -Generate statistics: {"No" if weight == False else "Yes"}') print('\n--- STARTING DATA ACQUISITION PROCESS ---') # Step 2: Compile libs and programs print(f'\nPrepararing libraries and programs'.ljust(settings.strlen, '.'), end=' ', flush=True) pool = ThreadPool(processes=2) async_result_make = pool.apply_async(utils.make_progs, (target, )) make_ret = async_result_make.get() if make_ret != 0: sys.exit(2) for suite in total_ciphersuites: print(f'\nStarting analysis for: {suite} ({current}/{n_total})') current += 1 tls_opts['ciphersuite'] = suite # Step 3: Start server in thread 1 print(' Starting server'.ljust(settings.strlen, '.'), end=' ', flush=True) async_result_srv = pool.apply_async(run_srv, (target, tls_opts)) print('ok') # Step 4: Start client in thread 2 print(' Starting client'.ljust(settings.strlen, '.'), end=' ', flush=True) async_result_cli = pool.apply_async(run_cli, (target, tls_opts)) print('ok') # Step 5: Verify result from server and client srv_ret = async_result_srv.get() cli_ret = async_result_cli.get() if srv_ret == 1 and cli_ret == 1: not_ciphersuites.append(suite) elif srv_ret != 0 or cli_ret != 0: error_ciphersuites.append(suite) else: print('\n Data successfully obtained!!!') success_ciphersuites.append(suite) pool.close() pool.join() n_success = len(success_ciphersuites) n_not = len(not_ciphersuites) n_error = len(error_ciphersuites) if weight != False: # Step 6: Analyse data and create comparison plots for all ciphersuites that ended successfully print('\n--- STARTING DATA PLOTS GENERATION PROCESS ---') make_figs(tls_opts['path'], suites_file, success_ciphersuites, weight, handshake=handshake, serv_set=serv_set) # Step 7: For each target, save successful ciphersuites in a file # utils.write_ciphersuites('services', success_ciphersuites) # Step 8: Report final status print('\n--- FINAL STATUS ---') print('\nData generation:') print(f' -Number of ciphersuites: {n_total}') print(f' -Number of successes: {n_success}') print(f' -Number of n/a: {n_not}') print(f' -Number of errors: {n_error}') if n_not > 0: print(' -N/A ciphersuites:') for suite in not_ciphersuites: print(f' {suite}') if n_error > 0: print(' -Error ciphersuites:') for suite in error_ciphersuites: print(f' {suite}') if weight != False: print('\nPlots generation:') print(f' -Number of ciphersuites: {n_success}') print('\nData aquisition and analysis has ended.') print( f'You can check all the csv data in the docs/{tls_opts["path"]} directory', end='') if weight != False: print( f' and the generated plots and statistics in the tools/statistics/{tls_opts["path"]} ' + f'and tools/results/{tls_opts["path"]} directories, respectively', end='') print('.')
class CmdUpload(object): """ This class is responsible for uploading packages to remotes. The flow is: - Collect all the packages to be uploaded with the UploadCollecter - Execute the upload. For every ref: - Upload the recipe of the ref: "_upload_recipe" - If not FORCE, check the date "_check_recipe_date", i.e. if there are changes, do not allow uploading if the remote date is newer than the local cache one - Retrieve the sources (exports_sources), if they are not cached, and uploading to a different remote. "complete_recipe_sources" - Gather files and create 2 .tgz (exports, exports_sources) with "_compress_recipe_files" - Decide which files have to be uploaded and deleted from the server based on the different with the remote snapshot "_recipe_files_to_upload" This can raise if upload policy is not overwrite - Execute the real transfer "remote_manager.upload_recipe()" - For every package_id of every ref: "_upload_package" - Gather files and create package.tgz. "_compress_package_files" - (Optional) Do the integrity check of the package - Decide which files to upload and delete from server: "_package_files_to_upload". Can raise if policy is NOT overwrite - Do the actual upload All the REVISIONS are local defined, not retrieved from servers This requires calling to the remote API methods: - get_recipe_sources() to get the export_sources if they are missing - get_recipe_snapshot() to do the diff and know what files to upload - get_package_snapshot() to do the diff and know what files to upload - get_recipe_manifest() to check the date and raise if policy requires - get_package_manifest() to raise if policy!=force and manifests change """ def __init__(self, cache, user_io, remote_manager, loader, hook_manager): self._cache = cache self._user_io = user_io self._output = progress_bar.ProgressOutput(self._user_io.out) self._remote_manager = remote_manager self._loader = loader self._hook_manager = hook_manager self._upload_thread_pool = None self._exceptions_list = [] def upload(self, reference_or_pattern, remotes, upload_recorder, package_id=None, all_packages=None, confirm=False, retry=None, retry_wait=None, integrity_check=False, policy=None, query=None, parallel_upload=False): t1 = time.time() collecter = _UploadCollecter(self._cache, self._user_io, self._output, self._loader) refs_by_remote = collecter.collect(package_id, reference_or_pattern, confirm, remotes, all_packages, query) if parallel_upload: self._user_io.disable_input() self._upload_thread_pool = ThreadPool( cpu_count() if parallel_upload else 1) for remote, refs in refs_by_remote.items(): self._output.info("Uploading to remote '{}':".format(remote.name)) def upload_ref(ref_conanfile_prefs): _ref, _conanfile, _prefs = ref_conanfile_prefs try: self._upload_ref(_conanfile, _ref, _prefs, retry, retry_wait, integrity_check, policy, remote, upload_recorder, remotes) except BaseException as base_exception: base_trace = traceback.format_exc() self._exceptions_list.append( (base_exception, _ref, base_trace, remote)) self._upload_thread_pool.map(upload_ref, [(ref, conanfile, prefs) for (ref, conanfile, prefs) in refs]) self._upload_thread_pool.close() self._upload_thread_pool.join() if len(self._exceptions_list) > 0: for exc, ref, trace, remote in self._exceptions_list: t = "recipe" if isinstance(ref, ConanFileReference) else "package" msg = "%s: Upload %s to '%s' failed: %s\n" % ( str(ref), t, remote.name, str(exc)) if get_env("CONAN_VERBOSE_TRACEBACK", False): msg += trace self._output.error(msg) raise ConanException("Errors uploading some packages") logger.debug("UPLOAD: Time manager upload: %f" % (time.time() - t1)) def _upload_ref(self, conanfile, ref, prefs, retry, retry_wait, integrity_check, policy, recipe_remote, upload_recorder, remotes): """ Uploads the recipes and binaries identified by ref """ assert (ref.revision is not None), "Cannot upload a recipe without RREV" conanfile_path = self._cache.package_layout(ref).conanfile() # FIXME: I think it makes no sense to specify a remote to "pre_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("pre_upload", conanfile_path=conanfile_path, reference=ref, remote=recipe_remote) msg = "\rUploading %s to remote '%s'" % (str(ref), recipe_remote.name) self._output.info(left_justify_message(msg)) self._upload_recipe(ref, conanfile, retry, retry_wait, policy, recipe_remote, remotes) upload_recorder.add_recipe(ref, recipe_remote.name, recipe_remote.url) # Now the binaries if prefs: total = len(prefs) p_remote = recipe_remote def upload_package_index(index_pref): index, pref = index_pref try: up_msg = "\rUploading package %d/%d: %s to '%s'" % ( index + 1, total, str(pref.id), p_remote.name) self._output.info(left_justify_message(up_msg)) self._upload_package(pref, retry, retry_wait, integrity_check, policy, p_remote) upload_recorder.add_package(pref, p_remote.name, p_remote.url) except BaseException as pkg_exc: trace = traceback.format_exc() return pkg_exc, pref, trace, p_remote def upload_package_callback(ret): package_exceptions = [r for r in ret if r is not None] self._exceptions_list.extend(package_exceptions) if not package_exceptions: # FIXME: I think it makes no sense to specify a remote to "post_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("post_upload", conanfile_path=conanfile_path, reference=ref, remote=recipe_remote) # This doesn't wait for the packages to end, so the function returns # and the "pool entry" for the recipe is released self._upload_thread_pool.map_async( upload_package_index, [(index, pref) for index, pref in enumerate(prefs)], callback=upload_package_callback) else: # FIXME: I think it makes no sense to specify a remote to "post_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("post_upload", conanfile_path=conanfile_path, reference=ref, remote=recipe_remote) def _upload_recipe(self, ref, conanfile, retry, retry_wait, policy, remote, remotes): current_remote_name = self._cache.package_layout( ref).load_metadata().recipe.remote if remote.name != current_remote_name: complete_recipe_sources(self._remote_manager, self._cache, conanfile, ref, remotes) conanfile_path = self._cache.package_layout(ref).conanfile() self._hook_manager.execute("pre_upload_recipe", conanfile_path=conanfile_path, reference=ref, remote=remote) t1 = time.time() cache_files = self._compress_recipe_files(ref) with self._cache.package_layout(ref).update_metadata() as metadata: metadata.recipe.checksums = calc_files_checksum(cache_files) local_manifest = FileTreeManifest.loads( load(cache_files["conanmanifest.txt"])) remote_manifest = None if policy != UPLOAD_POLICY_FORCE: # Check SCM data for auto fields if hasattr(conanfile, "scm") and (conanfile.scm.get("url") == "auto" or conanfile.scm.get("revision") == "auto" or conanfile.scm.get("type") is None or conanfile.scm.get("url") is None or conanfile.scm.get("revision") is None): raise ConanException( "The recipe contains invalid data in the 'scm' attribute" " (some 'auto' values or missing fields 'type', 'url' or" " 'revision'). Use '--force' to ignore this error or export" " again the recipe ('conan export' or 'conan create') to" " fix these issues.") remote_manifest = self._check_recipe_date(ref, remote, local_manifest) if policy == UPLOAD_POLICY_SKIP: return ref files_to_upload, deleted = self._recipe_files_to_upload( ref, policy, cache_files, remote, remote_manifest, local_manifest) if files_to_upload or deleted: self._remote_manager.upload_recipe(ref, files_to_upload, deleted, remote, retry, retry_wait) self._upload_recipe_end_msg(ref, remote) else: self._output.info("Recipe is up to date, upload skipped") duration = time.time() - t1 log_recipe_upload(ref, duration, cache_files, remote.name) self._hook_manager.execute("post_upload_recipe", conanfile_path=conanfile_path, reference=ref, remote=remote) # The recipe wasn't in the registry or it has changed the revision field only if not current_remote_name: with self._cache.package_layout(ref).update_metadata() as metadata: metadata.recipe.remote = remote.name return ref def _upload_package(self, pref, retry=None, retry_wait=None, integrity_check=False, policy=None, p_remote=None): assert (pref.revision is not None), "Cannot upload a package without PREV" assert (pref.ref.revision is not None), "Cannot upload a package without RREV" pkg_layout = self._cache.package_layout(pref.ref) conanfile_path = pkg_layout.conanfile() self._hook_manager.execute("pre_upload_package", conanfile_path=conanfile_path, reference=pref.ref, package_id=pref.id, remote=p_remote) t1 = time.time() the_files = self._compress_package_files(pref, integrity_check) if policy == UPLOAD_POLICY_SKIP: return None files_to_upload, deleted = self._package_files_to_upload( pref, policy, the_files, p_remote) if files_to_upload or deleted: self._remote_manager.upload_package(pref, files_to_upload, deleted, p_remote, retry, retry_wait) logger.debug("UPLOAD: Time upload package: %f" % (time.time() - t1)) else: self._output.info("Package is up to date, upload skipped") duration = time.time() - t1 log_package_upload(pref, duration, the_files, p_remote) self._hook_manager.execute("post_upload_package", conanfile_path=conanfile_path, reference=pref.ref, package_id=pref.id, remote=p_remote) logger.debug("UPLOAD: Time uploader upload_package: %f" % (time.time() - t1)) # Update the package metadata checksums = calc_files_checksum(the_files) with pkg_layout.update_metadata() as metadata: cur_package_remote = metadata.packages[pref.id].remote if not cur_package_remote: metadata.packages[pref.id].remote = p_remote.name metadata.packages[pref.id].checksums = checksums return pref def _compress_recipe_files(self, ref): export_folder = self._cache.package_layout(ref).export() for f in (EXPORT_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME): tgz_path = os.path.join(export_folder, f) if is_dirty(tgz_path): self._output.warn("%s: Removing %s, marked as dirty" % (str(ref), f)) os.remove(tgz_path) clean_dirty(tgz_path) files, symlinks = gather_files(export_folder) if CONANFILE not in files or CONAN_MANIFEST not in files: raise ConanException("Cannot upload corrupted recipe '%s'" % str(ref)) export_src_folder = self._cache.package_layout(ref).export_sources() src_files, src_symlinks = gather_files(export_src_folder) the_files = _compress_recipe_files(files, symlinks, src_files, src_symlinks, export_folder, self._output) return the_files def _compress_package_files(self, pref, integrity_check): t1 = time.time() # existing package, will use short paths if defined package_folder = self._cache.package_layout( pref.ref, short_paths=None).package(pref) if is_dirty(package_folder): raise ConanException("Package %s is corrupted, aborting upload.\n" "Remove it with 'conan remove %s -p=%s'" % (pref, pref.ref, pref.id)) tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME) if is_dirty(tgz_path): self._output.warn("%s: Removing %s, marked as dirty" % (str(pref), PACKAGE_TGZ_NAME)) os.remove(tgz_path) clean_dirty(tgz_path) # Get all the files in that directory files, symlinks = gather_files(package_folder) if CONANINFO not in files or CONAN_MANIFEST not in files: logger.error("Missing info or manifest in uploading files: %s" % (str(files))) raise ConanException("Cannot upload corrupted package '%s'" % str(pref)) logger.debug("UPLOAD: Time remote_manager build_files_set : %f" % (time.time() - t1)) if integrity_check: self._package_integrity_check(pref, files, package_folder) logger.debug( "UPLOAD: Time remote_manager check package integrity : %f" % (time.time() - t1)) the_files = _compress_package_files(files, symlinks, package_folder, self._output) return the_files def _recipe_files_to_upload(self, ref, policy, files, remote, remote_manifest, local_manifest): self._remote_manager.check_credentials(remote) remote_snapshot = self._remote_manager.get_recipe_snapshot(ref, remote) if not remote_snapshot: return files, set() deleted = set(remote_snapshot).difference(files) if policy != UPLOAD_POLICY_FORCE: if remote_manifest is None: # This is the weird scenario, we have a snapshot but don't have a manifest. # Can be due to concurrency issues, so we can try retrieve it now try: remote_manifest, _ = self._remote_manager.get_recipe_manifest( ref, remote) except NotFoundException: # This is weird, the manifest still not there, better upload everything self._output.warn( "The remote recipe doesn't have the 'conanmanifest.txt' " "file and will be uploaded: '{}'".format(ref)) return files, deleted if remote_manifest == local_manifest: return None, None if policy in (UPLOAD_POLICY_NO_OVERWRITE, UPLOAD_POLICY_NO_OVERWRITE_RECIPE): raise ConanException( "Local recipe is different from the remote recipe. " "Forbidden overwrite.") return files, deleted def _package_files_to_upload(self, pref, policy, the_files, remote): self._remote_manager.check_credentials(remote) remote_snapshot = self._remote_manager.get_package_snapshot( pref, remote) if remote_snapshot and policy != UPLOAD_POLICY_FORCE: if not is_package_snapshot_complete(remote_snapshot): return the_files, set() remote_manifest, _ = self._remote_manager.get_package_manifest( pref, remote) local_manifest = FileTreeManifest.loads( load(the_files["conanmanifest.txt"])) if remote_manifest == local_manifest: return None, None if policy == UPLOAD_POLICY_NO_OVERWRITE: raise ConanException( "Local package is different from the remote package. Forbidden" " overwrite.") deleted = set(remote_snapshot).difference(the_files) return the_files, deleted def _upload_recipe_end_msg(self, ref, remote): msg = "\rUploaded conan recipe '%s' to '%s'" % (str(ref), remote.name) url = remote.url.replace("https://api.bintray.com/conan", "https://bintray.com") msg += ": %s" % url self._output.info(left_justify_message(msg)) def _package_integrity_check(self, pref, files, package_folder): # If package has been modified remove tgz to regenerate it self._output.rewrite_line("Checking package integrity...") # short_paths = None is enough if there exist short_paths layout = self._cache.package_layout(pref.ref, short_paths=None) read_manifest, expected_manifest = layout.package_manifests(pref) if read_manifest != expected_manifest: self._output.writeln("") diff = read_manifest.difference(expected_manifest) for fname, (h1, h2) in diff.items(): self._output.warn( "Mismatched checksum '%s' (manifest: %s, file: %s)" % (fname, h1, h2)) if PACKAGE_TGZ_NAME in files: tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME) try: os.unlink(tgz_path) except OSError: pass error_msg = os.linesep.join( "Mismatched checksum '%s' (manifest: %s, file: %s)" % (fname, h1, h2) for fname, (h1, h2) in diff.items()) logger.error("Manifests doesn't match!\n%s" % error_msg) raise ConanException("Cannot upload corrupted package '%s'" % str(pref)) else: self._output.rewrite_line("Package integrity OK!") self._output.writeln("") def _check_recipe_date(self, ref, remote, local_manifest): try: remote_recipe_manifest, ref = self._remote_manager.get_recipe_manifest( ref, remote) except NotFoundException: return # First time uploading this package if (remote_recipe_manifest != local_manifest and remote_recipe_manifest.time > local_manifest.time): self._print_manifest_information(remote_recipe_manifest, local_manifest, ref, remote) raise ConanException( "Remote recipe is newer than local recipe: " "\n Remote date: %s\n Local date: %s" % (remote_recipe_manifest.time, local_manifest.time)) return remote_recipe_manifest def _print_manifest_information(self, remote_recipe_manifest, local_manifest, ref, remote): try: self._output.info("\n%s" % ("-" * 40)) self._output.info("Remote manifest:") self._output.info(remote_recipe_manifest) self._output.info("Local manifest:") self._output.info(local_manifest) difference = remote_recipe_manifest.difference(local_manifest) if "conanfile.py" in difference: contents = load(self._cache.package_layout(ref).conanfile()) endlines = "\\r\\n" if "\r\n" in contents else "\\n" self._output.info("Local 'conanfile.py' using '%s' line-ends" % endlines) remote_contents = self._remote_manager.get_recipe_path( ref, path="conanfile.py", remote=remote) endlines = "\\r\\n" if "\r\n" in remote_contents else "\\n" self._output.info( "Remote 'conanfile.py' using '%s' line-ends" % endlines) self._output.info("\n%s" % ("-" * 40)) except Exception as e: self._output.info("Error printing information about the diff: %s" % str(e))
class Scheduler(MooseObject): """ Base class for handling jobs asynchronously. To use this class, call .schedule() and supply a list of testers to schedule. Each group of testers supplied will begin running immediately. Syntax: .schedule([list of tester objects]) A list of testers will be added to a queue and begin calling their derived run method. You can continue to add more testers to the queue in this fashion. Once all jobs have been scheduled, call .waitFinish() to wait until all jobs have finished. """ @staticmethod def validParams(): params = MooseObject.validParams() params.addRequiredParam('average_load', 64.0, "Average load to allow") params.addRequiredParam('max_processes', None, "Hard limit of maxium processes to use") params.addParam('min_reported_time', 10, "The minimum time elapsed before a job is reported as taking to long to run.") return params # This is what will be checked for when we look for valid schedulers IS_SCHEDULER = True def __init__(self, harness, params): MooseObject.__init__(self, harness, params) ## The test harness to run callbacks on self.harness = harness # Retrieve and store the TestHarness options for use in this object self.options = harness.getOptions() # The Scheduler class can be initialized with no "max_processes" argument and it'll default # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit. # The difference is whether or not we allow single jobs to exceed the number of slots. if params['max_processes'] == None: self.available_slots = 1 self.soft_limit = True else: self.available_slots = params['max_processes'] # hard limit self.soft_limit = False self.average_load = params['average_load'] self.min_report_time = params['min_reported_time'] # Initialize run_pool based on available slots self.run_pool = ThreadPool(processes=self.available_slots) # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered) self.status_pool = ThreadPool(processes=1) # Slot lock when processing resource allocations and modifying slots_in_use self.slot_lock = threading.Lock() # Job lock when modifying a jobs status self.activity_lock = threading.Lock() # Job count lock when modifying incoming/outgoing jobs self.job_count_lock = threading.Lock() # A combination of processors + threads (-j/-n) currently in use, that a job requires self.slots_in_use = 0 # Count of jobs which need to complete self.job_count = 0 # Set containing all submitted jobs self.__job_bank = set([]) # Total running Job and Test failures encountered self.__failures = 0 # Allow threads to set a global exception self.__error_state = False # Private set of jobs currently running self.__active_jobs = set([]) # Jobs that are taking longer to finish than the alloted time are reported back early to inform # the user 'stuff' is still running. Jobs entering this set will not be reported again. self.jobs_reported = set([]) # The last time the scheduler reported something self.last_reported_time = clock() # Sets of threading objects created by jobs entering and exiting the queues. When scheduler.waitFinish() # is called, and both thread pools are empty, the pools shut down, and the call to waitFinish() returns. self.__status_pool_lock = threading.Lock() self.__runner_pool_lock = threading.Lock() self.__status_pool_jobs = set([]) self.__runner_pool_jobs = set([]) # True when scheduler.waitFinish() is called. This alerts the scheduler, no more jobs are # to be scheduled. KeyboardInterrupts are then handled by the thread pools. self.__waiting = False def triggerErrorState(self): self.__error_state = True self.run_pool.close() self.status_pool.close() def killRemaining(self, keyboard=False): """ Method to kill running jobs """ with self.activity_lock: for job in self.__active_jobs: job.killProcess() if keyboard: self.triggerErrorState() self.harness.keyboard_interrupt() else: self.triggerErrorState() def retrieveJobs(self): """ return all the jobs the scheduler was tasked to perform work for """ return self.__job_bank def schedulerError(self): """ boolean if the scheduler prematurely exited """ return self.__error_state and not self.maxFailures() def maxFailures(self): """ Boolean for hitting max failures """ return ((self.options.valgrind_mode and self.__failures >= self.options.valgrind_max_fails) or self.__failures >= self.options.max_fails) def run(self, job): """ Call derived run method """ return def notifyFinishedSchedulers(self): """ Notify derived schedulers we are finished """ return def augmentJobs(self, Jobs): """ Allow derived schedulers to augment Jobs before they perform work. Note: This occurs before we perform a job count sanity check. So any additions or subtractions to the number of jobs will result in an exception. """ return def waitFinish(self): """ Inform the Scheduler there are no further jobs to schedule. Return once all jobs have completed. """ self.__waiting = True try: # wait until there is an error, or if all the queus are empty waiting_on_status_pool = True waiting_on_runner_pool = True while (waiting_on_status_pool or waiting_on_runner_pool) and self.job_count: if self.__error_state: break with self.__status_pool_lock: waiting_on_status_pool = sum(1 for x in self.__status_pool_jobs if not x.ready()) with self.__runner_pool_lock: waiting_on_runner_pool = sum(1 for x in self.__runner_pool_jobs if not x.ready()) sleep(0.1) # Reporting sanity check if not self.__error_state and self.job_count: raise SchedulerError('Scheduler exiting with different amount of work than what was tasked!') if not self.__error_state: self.run_pool.close() self.run_pool.join() self.status_pool.close() self.status_pool.join() # allow derived schedulers to perform any exit routines self.notifyFinishedSchedulers() except KeyboardInterrupt: self.killRemaining(keyboard=True) def schedule(self, testers): """ Generate and submit a group of testers to a thread pool queue for execution. """ # If we are not to schedule any more jobs for some reason, return now if self.__error_state: return # Instance our job DAG, create jobs, and a private lock for this group of jobs (testers) Jobs = JobDAG(self.options) j_dag = Jobs.createJobs(testers) j_lock = threading.Lock() # Allow derived schedulers access to the jobs before they launch self.augmentJobs(Jobs) # job-count to tester-count sanity check if j_dag.size() != len(testers): raise SchedulerError('Scheduler was going to run a different amount of testers than what was received (something bad happened)!') # Final reporting job-count sanity check with self.job_count_lock: self.job_count += j_dag.size() # Store all processed jobs in the global job bank self.__job_bank.update(j_dag.topological_sort()) # Launch these jobs to perform work self.queueJobs(Jobs, j_lock) def queueJobs(self, Jobs, j_lock): """ Determine which queue jobs should enter. Finished jobs are placed in the status pool to be printed while all others are placed in the runner pool to perform work. A finished job will trigger a change to the Job DAG, which will allow additional jobs to become available and ready to enter the runner pool (dependency jobs). """ with j_lock: concurrent_jobs = Jobs.getJobsAndAdvance() for job in concurrent_jobs: if job.isFinished(): if not self.status_pool._state: with self.__status_pool_lock: self.__status_pool_jobs.add(self.status_pool.apply_async(self.jobStatus, (job, Jobs, j_lock))) elif job.isHold(): if not self.run_pool._state: with self.__runner_pool_lock: job.setStatus(job.queued) self.__runner_pool_jobs.add(self.run_pool.apply_async(self.runJob, (job, Jobs, j_lock))) def getLoad(self): """ Method to return current load average """ loadAverage = 0.0 try: loadAverage = os.getloadavg()[0] except AttributeError: pass # getloadavg() not available in this implementation of os return loadAverage def satisfyLoad(self): """ Method for controlling load average """ while self.slots_in_use > 1 and self.getLoad() >= self.average_load: sleep(1.0) def reserveSlots(self, job, j_lock): """ Method which allocates resources to perform the job. Returns bool if job should be allowed to run based on available resources. """ # comply with load average if self.options.load: self.satisfyLoad() with self.slot_lock: can_run = False if self.slots_in_use + job.getSlots() <= self.available_slots: can_run = True # Check for insufficient slots -soft limit elif job.getSlots() > self.available_slots and self.soft_limit: job.addCaveats('OVERSIZED') can_run = True # Check for insufficient slots -hard limit (skip this job) elif job.getSlots() > self.available_slots and not self.soft_limit: job.addCaveats('insufficient slots') with j_lock: job.setStatus(job.skip) if can_run: self.slots_in_use += job.getSlots() return can_run def handleTimeoutJob(self, job, j_lock): """ Handle jobs that have timed out """ with j_lock: if job.isRunning(): job.setStatus(job.crash, 'TIMEOUT') job.killProcess() def handleLongRunningJob(self, job, Jobs, j_lock): """ Handle jobs that have not reported in the alotted time """ with self.__status_pool_lock: self.__status_pool_jobs.add(self.status_pool.apply_async(self.jobStatus, (job, Jobs, j_lock))) def jobStatus(self, job, Jobs, j_lock): """ Instruct the TestHarness to print the status of job. This is a serial threaded operation, so as to prevent clobbering of text being printed to stdout. """ if self.status_pool._state: return # Its possible, the queue is just trying to empty try: job_was_running = False # Check if we should print due to inactivity with j_lock: if job.isRunning(): if job in self.jobs_reported: return # report inactivity if last reported time falls within tolerances elif clock() - self.last_reported_time >= self.min_report_time: job_was_running = True job.addCaveats('FINISHED') with self.activity_lock: self.jobs_reported.add(job) # TestHarness has not yet been inactive long enough to warrant a report else: # adjust the next report time based on delta of last report time adjusted_interval = max(1, self.min_report_time - max(1, clock() - self.last_reported_time)) job.report_timer = threading.Timer(adjusted_interval, self.handleLongRunningJob, (job, Jobs, j_lock,)) job.report_timer.start() return # Immediately following the Job lock, print the status self.harness.handleJobStatus(job) # Do last, to prevent premature thread pool closures with j_lock: tester = job.getTester() if not tester.isSilent(): self.last_reported_time = clock() if job.isFinished() and not job_was_running: if tester.isFail(): self.__failures += 1 if self.maxFailures(): self.killRemaining() else: with self.job_count_lock: self.job_count -= 1 except Exception: print('statusWorker Exception: %s' % (traceback.format_exc())) self.killRemaining() except KeyboardInterrupt: self.killRemaining(keyboard=True) def runJob(self, job, Jobs, j_lock): """ Method the run_pool calls when an available thread becomes ready """ # Its possible, the queue is just trying to empty. Allow it to do so # with out generating overhead if self.__error_state: return try: # see if we have enough slots to start this job if self.reserveSlots(job, j_lock): with j_lock: job.setStatus(job.running) with self.activity_lock: self.__active_jobs.add(job) tester = job.getTester() timeout_timer = threading.Timer(float(tester.getMaxTime()), self.handleTimeoutJob, (job, j_lock,)) job.report_timer = threading.Timer(self.min_report_time, self.handleLongRunningJob, (job, Jobs, j_lock,)) job.report_timer.start() timeout_timer.start() self.run(job) # Hand execution over to derived scheduler timeout_timer.cancel() # Recover worker count before attempting to queue more jobs with self.slot_lock: self.slots_in_use = max(0, self.slots_in_use - job.getSlots()) # Stop the long running timer job.report_timer.cancel() # All done with j_lock: job.setStatus(job.finished) with self.activity_lock: self.__active_jobs.remove(job) # Not enough slots to run the job... else: # ...currently, place back on hold before placing it back into the queue if not job.isFinished(): with j_lock: job.setStatus(job.hold) sleep(.1) # Job is done (or needs to re-enter the queue) self.queueJobs(Jobs, j_lock) except Exception: print('runWorker Exception: %s' % (traceback.format_exc())) self.killRemaining() except KeyboardInterrupt: self.killRemaining(keyboard=True)
def execute(self) -> typing.Optional[int]: """Execute the transfer.""" # Validate arguments res = self.check_args(self.args) if res: # pragma: nocover return res # Logger logger.info("Starting cubi-tk snappy %s", self.command_name) logger.info(" args: %s", self.args) # Fix for ngs_mapping & variant_calling vs step if self.step_name is None: self.step_name = self.args.step # Find biomedsheet file biomedsheet_tsv = get_biomedsheet_path(start_path=self.args.base_path, uuid=self.args.destination) # Extract library names from sample sheet sheet = load_sheet_tsv(biomedsheet_tsv, self.args.tsv_shortcut) library_names = list( self.yield_ngs_library_names(sheet=sheet, min_batch=self.args.first_batch, max_batch=self.args.last_batch)) logger.info("Libraries in sheet:\n%s", "\n".join(sorted(library_names))) lz_uuid, transfer_jobs = self.build_jobs(library_names) logger.debug("Transfer jobs:\n%s", "\n".join(map(lambda x: x.to_oneline(), transfer_jobs))) if self.fix_md5_files: transfer_jobs = self._execute_md5_files_fix(transfer_jobs) total_bytes = sum([job.bytes for job in transfer_jobs]) logger.info( "Transferring %d files with a total size of %s", len(transfer_jobs), sizeof_fmt(total_bytes), ) counter = Value(c_ulonglong, 0) with tqdm.tqdm(total=total_bytes, unit="B", unit_scale=True) as t: if self.args.num_parallel_transfers == 0: # pragma: nocover for job in transfer_jobs: irsync_transfer(job, counter, t) else: pool = ThreadPool(processes=self.args.num_parallel_transfers) for job in transfer_jobs: pool.apply_async(irsync_transfer, args=(job, counter, t)) pool.close() pool.join() # Validate and move transferred files # Behaviour: If flag is True and lz uuid is not None*, # it will ask SODAR to validate and move transferred files. # (*) It can be None if user provided path if lz_uuid and self.args.validate_and_move: self.move_landing_zone(lz_uuid=lz_uuid) else: logger.info( "Transferred files will \033[1mnot\033[0m be automatically moved in SODAR." ) logger.info("All done") return None
def pred_eval_multiprocess(gpu_num, key_predictors, cur_predictors, test_datas, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): if cfg.TEST.SEQ_NMS == False: if gpu_num == 1: res = [ pred_eval(0, key_predictors[0], cur_predictors[0], test_datas[0], imdb, cfg, vis, thresh, logger, ignore_cache), ] else: from multiprocessing.pool import ThreadPool as Pool pool = Pool(processes=gpu_num) multiple_results = [ pool.apply_async(pred_eval, args=(i, key_predictors[i], cur_predictors[i], test_datas[i], imdb, cfg, vis, thresh, logger, ignore_cache)) for i in range(gpu_num) ] pool.close() pool.join() res = [res.get() for res in multiple_results] info_str = imdb.evaluate_detections_multiprocess(res) else: if gpu_num == 1: res = [ pred_eval(0, key_predictors[0], cur_predictors[0], test_datas[0], imdb, cfg, vis, thresh, logger, ignore_cache), ] else: from multiprocessing.pool import ThreadPool as Pool pool = Pool(processes=gpu_num) multiple_results = [ pool.apply_async(pred_eval, args=(i, key_predictors[i], cur_predictors[i], test_datas[i], imdb, cfg, vis, thresh, logger, ignore_cache)) for i in range(gpu_num) ] pool.close() pool.join() res = [res.get() for res in multiple_results] from multiprocessing import Pool as Pool pool = Pool(processes=gpu_num) jobs = [] res = [] for i in range(gpu_num): job = apply_async(pool, pred_eval_seqnms, (i, imdb)) jobs.append(job) for job in jobs: res.append(job.get()) info_str = imdb.do_python_eval_gen(gpu_num) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def fold_and_score_pipeline(data): ''' required from pipeline: Filetype, filename, beam id , pointing id, directory ''' tstart = time.time() output_dps = [] dp_list = [] processing_args = data['processing_args'] output_dir = data['base_output_dir'] processing_id = data['processing_id'] #Make output dir try: subprocess.check_call("mkdir -p %s" % (output_dir), shell=True) except: log.info("Already made subdirectory") pass # Make temporary folder to keep any temporary outputs tmp_dir = '/beeond/PROCESSING/TEMP/%d' % processing_id try: subprocess.check_call("mkdir -p %s" % (tmp_dir), shell=True) except: log.info("Already made subdirectory") pass # Get the beam info for pointing in data["data"]["pointings"]: utc_start = pointing['utc_start'] for beam in pointing["beams"]: input_fil_list = [] for dp in (beam["data_products"]): if '.fil' in dp["filename"]: input_fil_list.append(dp["filename"]) elif '.tar.gz' in dp['filename']: tarred_csv = dp["filename"] beam_ID = int(beam["id"]) beam_name = beam["name"] input_fil_list.sort() input_filenames = ' '.join(input_fil_list) # Untar csv file untar_file(tarred_csv, tmp_dir) tmp_dir = tmp_dir + '/' + os.path.basename(tarred_csv) #Read candidate info file into Pandas Dataframe cand_file = glob.glob('%s/*good_cands_to_fold_with_beam.csv' % (tmp_dir))[0] df = pd.read_csv(cand_file) # Select only candidates with corresponding beam id and snr cutoff snr_cut_cands = df[ df['snr'] > float(processing_args['snr_cutoff'])] single_beam_cands = snr_cut_cands[snr_cut_cands['beam_id'] == beam_ID] single_beam_cands.sort_values('snr', inplace=True, ascending=False) #Limit number of candidates to fold if single_beam_cands.shape[0] > processing_args[ 'cand_limit_per_beam']: single_beam_cands_fold_limited = single_beam_cands.head( processing_args['cand_limit_per_beam']) else: single_beam_cands_fold_limited = single_beam_cands # Read parameters and fold cand_periods = single_beam_cands_fold_limited['period'].to_numpy() cand_accs = single_beam_cands_fold_limited['acc'].to_numpy() cand_dms = single_beam_cands_fold_limited['dm'].to_numpy() cand_ids = single_beam_cands_fold_limited[ 'cand_id_in_file'].to_numpy() xml_files = single_beam_cands_fold_limited['file'].to_numpy( ) # Choose first element. If filtered right, there should be just one xml filename throughout! tree = ET.parse(xml_files[0]) root = tree.getroot() tsamp = float(root.find("header_parameters/tsamp").text) fft_size = float(root.find('search_parameters/size').text) no_of_samples = int(root.find("header_parameters/nsamples").text) mod_periods = [] pdots = [] for i in range(len(cand_periods)): Pdot = a_to_pdot(cand_periods[i], cand_accs[i]) mod_periods.append( period_modified(cand_periods[i], Pdot, no_of_samples, tsamp, fft_size)) pdots.append(Pdot) cand_mod_periods = np.asarray(mod_periods, dtype=float) mask_path = '/beegfs/PROCESSING/TRAPUM/RFIFIND_masks/Fermi_409chans_mask/Fermi_beam0_052838_20200704_rfifind.mask' #Parallel process the folds no_of_cands = len(cand_mod_periods) command_list = [] for i in range(no_of_cands): folding_packet = {} folding_packet['period'] = cand_mod_periods[i] folding_packet['acc'] = cand_accs[i] folding_packet['pdot'] = pdots[i] folding_packet['dm'] = cand_dms[i] output_name = "%s_%s_candidate_no_%03d_dm_%.2f_acc_%.2f" % ( beam_name, utc_start, cand_ids[i], folding_packet['dm'], folding_packet['acc']) script = "prepfold -ncpus 1 -nsub 256 -mask %s -noxwin -topo -p %s -pd %s -dm %s %s -o %s" % ( mask_path, str(folding_packet['period']), str(folding_packet['pdot']), str( folding_packet['dm']), input_filenames, output_name) command_list.append(script) pool = ThreadPool(multiprocessing.cpu_count()) for command in command_list: pool.apply_async(execute_command, args=(command, tmp_dir)) pool.close() pool.join() log.info( "Folding done for all candidates. Scoring all candidates...") subprocess.check_call("python2 webpage_score.py --in_path=%s" % tmp_dir, shell=True) log.info("Scoring done...") sys.exit(0) #Create tar file of tmp directory in output directory subprocess.check_call("rm *.csv", shell=True, cwd=tmp_dir) # Remove the csv files log.info("Tarring up all folds and the score file") tar_name = os.path.basename(output_dir) + "folds_and_scores.tar.gz" make_tarfile(output_dir, tmp_dir, tar_name) log.info("Tarred") # Remove contents in temporary directory remove_dir(tmp_dir) log.info("Removed temporary files") # Add tar file to dataproduct dp = dict(type="fold_tar_file", filename=tar_name, directory=output_dir, beam_id=beam_ID, pointing_id=pointing["id"], metainfo=json.dumps("tar_file:folded_archives")) output_dps.append(dp) tend = time.time() print("Time taken is : %f s" % (tend - tstart)) return output_dps
class Scheduler(MooseObject): """ Base class for handling jobs asynchronously. To use this class, call .schedule() and supply a list of testers to schedule. Each group of testers supplied will begin running immediately. Syntax: .schedule([list of tester objects]) A list of testers will be added to a queue and begin calling their derived run method. You can continue to add more testers to the queue in this fashion. Once you schedule all the testers you wish to test, call .waitFinish() to wait until all testers have finished. """ @staticmethod def validParams(): params = MooseObject.validParams() params.addRequiredParam('average_load', 64.0, "Average load to allow") params.addRequiredParam('max_processes', None, "Hard limit of maxium processes to use") return params def __init__(self, harness, params): MooseObject.__init__(self, harness, params) ## The test harness to run callbacks on self.harness = harness # Retrieve and store the TestHarness options for use in this object self.options = harness.getOptions() # The Scheduler class can be initialized with no "max_processes" argument and it'll default # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit. # The difference is whether or not we allow single jobs to exceed the number of slots. if params['max_processes'] == None: self.available_slots = 1 self.soft_limit = True else: self.available_slots = params['max_processes'] # hard limit self.soft_limit = False # Requested average load level to stay below self.average_load = params['average_load'] # The time the status queue reported no activity to the TestHarness self.last_reported = clock() # A set containing jobs that have been reported self.jobs_reported = set([]) # Initialize run_pool based on available slots self.run_pool = ThreadPool(processes=self.available_slots) # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered) self.status_pool = ThreadPool(processes=1) # Slot Lock when processing resource allocations self.slot_lock = threading.Lock() # DAG Lock when processing the DAG self.dag_lock = threading.Lock() # Workers in use (single job might request multiple slots) self.slots_in_use = 0 # Jobs waiting to finish (includes actively running jobs) self.job_queue_count = 0 # Set containing our TesterData containers. We use this in the event of a KeyboardInterrupt to # iterate over and kill any subprocesses self.tester_datas = set([]) def killRemaining(self): """ Method to kill any running subprocess started by the Scheduler. This also closes the status pool to prevent further statuses from printing to the screen. """ self.run_pool.close() self.status_pool.close() for tester_data in self.tester_datas: tester_data.killProcess() self.job_queue_count = 0 def run(self, job_container): """ Call derived run method """ return def skipPrereqs(self): """ Method to return boolean to skip dependency prerequisites checks. """ if self.options.ignored_caveats: if 'all' in self.options.ignored_caveats or 'prereq' in self.options.ignored_caveats: return True return False def processDownstreamTests(self, job_container): """ Method to discover and delete downstream jobs due to supplied job failing. """ with self.dag_lock: failed_job_containers = set([]) tester = job_container.getTester() job_dag = job_container.getDAG() if (tester.isFinished() and not tester.didPass() and not tester.isSilent() and not self.skipPrereqs()) \ or (self.options.dry_run and not tester.isSilent()): # Ask the DAG to delete and return the downstream jobs associated with this job failed_job_containers.update(job_dag.delete_downstreams(job_container)) for failed_job in failed_job_containers: tester = failed_job.getTester() tester.setStatus('skipped dependency', tester.bucket_skip) return failed_job_containers def buildDAG(self, job_container_dict, job_dag): """ Build the DAG and catch any failures. """ failed_or_skipped_testers = set([]) # Create DAG independent nodes for tester_name, job_container in job_container_dict.iteritems(): tester = job_container.getTester() # If this tester is not runnable, continue to the next tester if tester.getRunnable(self.options): job_dag.add_node_if_not_exists(job_container) else: failed_or_skipped_testers.add(tester) continue # Create edge nodes for tester_name, job_container in job_container_dict.iteritems(): tester = job_container.getTester() # Add the prereq node and edges for prereq in tester.getPrereqs(): try: # Try to produce a KeyError and capture an unknown dependency job_container_dict[prereq] # Try to produce either a cyclic or skipped dependency error using the DAG's # built-in exception methods job_dag.add_edge(job_container_dict[prereq], job_container) # Skipped Dependencies except dag.DAGEdgeIndError: if not self.skipPrereqs(): tester.setStatus('skipped dependency', tester.bucket_skip) failed_or_skipped_testers.add(tester) # Add the parent node / dependency edge to create a functional DAG now that we have caught # the skipped dependency (needed for discovering race conditions later on) job_dag.add_node_if_not_exists(job_container_dict[prereq]) job_dag.add_edge(job_container_dict[prereq], job_container) # Cyclic Failure except dag.DAGValidationError: tester.setStatus('Cyclic or Invalid Dependency Detected!', tester.bucket_fail) failed_or_skipped_testers.add(tester) # Unknown Dependency Failure except KeyError: tester.setStatus('unknown dependency', tester.bucket_fail) failed_or_skipped_testers.add(tester) # Skipped/Silent/Deleted Testers fall into this catagory, caused by 'job_container' being skipped # during the first iteration above except dag.DAGEdgeDepError: pass # With a working DAG created above (even a partial one), discover race conditions with remaining runnable # testers. failed_or_skipped_testers.update(self.checkRaceConditions(job_dag)) return failed_or_skipped_testers def checkRaceConditions(self, dag_object): """ Return a set of failing testers exhibiting race conditions with their output file. """ failed_or_skipped_testers = set([]) # clone the dag so we can operate destructively on the cloned dag dag_clone = dag_object.clone() while dag_clone.size(): output_files_in_dir = set() # Get a list of concurrent job containers concurrent_jobs = dag_clone.ind_nodes() for job_container in concurrent_jobs: tester = job_container.getTester() output_files = tester.getOutputFiles() # check if we have colliding output files if len(output_files_in_dir.intersection(set(output_files))): # Fail this concurrent group of testers for this_job in concurrent_jobs: tester = this_job.getTester() tester.setStatus('OUTFILE RACE CONDITION', tester.bucket_fail) failed_or_skipped_testers.add(tester) # collisions detected, move on to the next set break output_files_in_dir.update(output_files) # Delete this group of job containers and allow the loop to continue for job_container in concurrent_jobs: dag_clone.delete_node(job_container) return failed_or_skipped_testers def schedule(self, testers): """ Schedule supplied list of testers for execution. """ # If any threads caused an exception, we have already closed down the queue and need to # not schedule any more jobs if self.run_pool._state: return # Instance the DAG class so we can share it amongst all the TesterData containers job_dag = dag.DAG() non_runnable_jobs = set([]) name_to_job_container = {} # Increment our simple queue count with the number of testers the scheduler received with self.slot_lock: self.job_queue_count += len(testers) # Create a local dictionary of tester names to job containers. Add this dictionary to a # set. We will use this set as a way to gain access to their methods. for tester in testers: name_to_job_container[tester.getTestName()] = TesterData(tester, job_dag, self.options) self.tester_datas.add(name_to_job_container[tester.getTestName()]) # Populate job_dag with testers. This method will also return any testers which caused failures # while building the DAG. skipped_or_failed_testers = self.buildDAG(name_to_job_container, job_dag) # Create a set of failing job containers for failed_tester in skipped_or_failed_testers: non_runnable_jobs.add(name_to_job_container[failed_tester.getTestName()]) # Iterate over the jobs in our non_runnable_jobs and handle any downstream jobs affected by # 'job'. These will be our 'skipped dependency' tests. for job in non_runnable_jobs.copy(): additionally_skipped = self.processDownstreamTests(job) non_runnable_jobs.update(additionally_skipped) job_dag.delete_node_if_exists(job) # Get a count of all the items still in the DAG. These will be the jobs that ultimately are queued runnable_jobs = job_dag.size() # Make sure we didn't drop a tester somehow if len(non_runnable_jobs) + runnable_jobs != len(testers): raise SchedulerError('Runnable tests in addition to Skipped tests does not match total scheduled test count!') # Assign a status thread to begin work on any skipped/failed jobs self.queueJobs(status_jobs=non_runnable_jobs) # Build our list of runnable jobs and set the tester's status to queued job_list = [] if runnable_jobs: job_list = job_dag.ind_nodes() for job_container in job_list: tester = job_container.getTester() tester.setStatus('QUEUED', tester.bucket_pending) # Queue runnable jobs self.queueJobs(run_jobs=job_list) def waitFinish(self): """ Block while the job queue is not empty. Once empty, this method will begin closing down the thread pools and perform a join. Once the last thread exits, we return from this method. There are two thread pools in play; the Tester pool which is performing all the tests, and the Status pool which is handling the printing of tester statuses. Because the Status pool will always have the last item needing to be 'printed', we close and join the Tester pool first, and then we do the same to the Status pool. """ while self.job_queue_count > 0: sleep(0.5) self.run_pool.close() self.run_pool.join() self.status_pool.close() self.status_pool.join() def handleLongRunningJobs(self, job_container): """ Handle jobs that have not reported in alotted time """ if job_container not in self.jobs_reported: tester = job_container.getTester() tester.setStatus('RUNNING...', tester.bucket_pending) self.queueJobs(status_jobs=[job_container]) # Restart the reporting timer for this job job_container.report_timer = threading.Timer(float(tester.getMinReportTime()), self.handleLongRunningJobs, (job_container,)) job_container.report_timer.start() def handleTimeoutJobs(self, job_container): """ Handle jobs that have timed out """ tester = job_container.getTester() tester.setStatus('TIMEOUT', tester.bucket_fail) job_container.killProcess() def getLoad(self): """ Method to return current load average """ loadAverage = 0.0 try: loadAverage = os.getloadavg()[0] except AttributeError: pass # getloadavg() not available in this implementation of os return loadAverage def satisfyLoad(self): """ Method for controlling load average """ while self.slots_in_use > 1 and self.getLoad() >= self.average_load: sleep(1.0) def reserveSlots(self, job_container): """ Method which allocates resources to perform the job. Returns bool if job should be allowed to run. """ tester = job_container.getTester() # comply with load average if self.options.load: self.satisfyLoad() with self.slot_lock: can_run = False if self.slots_in_use + tester.getProcs(self.options) <= self.available_slots: can_run = True # Check for insufficient slots -soft limit # TODO: Create a unit test for this case elif tester.getProcs(self.options) > self.available_slots and self.soft_limit: tester.specs.addParam('caveats', ['OVERSIZED'], "") can_run = True # Check for insufficient slots -hard limit (skip this job) # TODO: Create a unit test for this case elif tester.getProcs(self.options) > self.available_slots and not self.soft_limit: tester.setStatus('insufficient slots', tester.bucket_skip) can_run = False if can_run: self.slots_in_use += tester.getProcs(self.options) return can_run def getNextJobGroup(self, job_container): """ Method to delete current finished job from the DAG and return the next list of individually runnable jobs. """ with self.dag_lock: job_dag = job_container.getDAG() next_job_list = [] # Delete this job from the shared DAG job_dag.delete_node(job_container) # Get next available job list concurrent_jobs = job_dag.ind_nodes() for next_job_container in concurrent_jobs: queued_tester = next_job_container.getTester() # Verify this job is not already running/pending/skipped if queued_tester.isInitialized(): # Set this next new job to pending so as to prevent this job from being launched a second time queued_tester.setStatus('QUEUED', queued_tester.bucket_pending) next_job_list.append(next_job_container) return next_job_list def queueJobs(self, status_jobs=[], run_jobs=[]): """ Method to control which thread pool jobs enter. Syntax: To have a job(s) display its current status to the screen: .queueJobs(status_jobs=[job_container_list] To begin running job(s): .queueJobs(run_jobs=[job_container_list] """ for job_container in run_jobs: if not self.run_pool._state: self.run_pool.apply_async(self.runWorker, (job_container,)) for job_container in status_jobs: if not self.status_pool._state: self.status_pool.apply_async(self.statusWorker, (job_container,)) def statusWorker(self, job_container): """ Method the status_pool calls when an available thread becomes ready """ # Wrap entire statusWorker thread inside a try/exception to catch thread errors try: tester = job_container.getTester() # If the job is still running for a long period of time and we have not reported # this same job alread, report it now. if tester.isPending(): if clock() - self.last_reported >= float(tester.getMinReportTime()) and job_container not in self.jobs_reported: # Inform the TestHarness of a long running test (RUNNING...) self.harness.handleTestStatus(job_container) # ...And then set the finished caveat now that the running status has printed tester.specs.addParam('caveats', ['FINISHED'], "") # Add this job to the reported container so it does not happen again self.jobs_reported.add(job_container) # Job is 'Pending', but is under the threshold to be reported (return now so # last_reported time does not get updated). This will ensure that if nothing # has happened between 'now' and another occurrence of our thread timer event # we do report it. else: return else: # All other statuses are sent unmolested self.harness.handleTestStatus(job_container) # Decrement the job queue count now that this job has finished if tester.isFinished(): with self.slot_lock: self.job_queue_count -= 1 # Record current reported time only if it is an activity the user will see if not tester.isSilent() or not tester.isDeleted(): self.last_reported = clock() except Exception as e: print 'statusWorker Exception: %s' % (e) self.killRemaining() def runWorker(self, job_container): """ Method the run_pool calls when an available thread becomes ready """ # Wrap the entire runWorker thread inside a try/exception to catch thread errors try: tester = job_container.getTester() # Check if there are enough resources to run this job if self.reserveSlots(job_container): # Start long running timer job_container.report_timer = threading.Timer(float(tester.getMinReportTime()), self.handleLongRunningJobs, (job_container,)) job_container.report_timer.start() # Start timeout timer timeout_timer = threading.Timer(float(tester.getMaxTime()), self.handleTimeoutJobs, (job_container,)) timeout_timer.start() # Call the derived run method self.run(job_container) # Stop timers now that the job has finished on its own job_container.report_timer.cancel() timeout_timer.cancel() # Derived run needs to set a non-pending status of some sort. if tester.isPending(): raise SchedulerError('Derived Scheduler can not return a pending status!') # Determin if this job creates any skipped dependencies (if it failed), and send # this new list of jobs to the status queue to be printed. possibly_skipped_job_containers = self.processDownstreamTests(job_container) possibly_skipped_job_containers.add(job_container) self.queueJobs(status_jobs=possibly_skipped_job_containers) # Get next job list next_job_group = self.getNextJobGroup(job_container) # Recover worker count before attempting to queue more jobs with self.slot_lock: self.slots_in_use = max(0, self.slots_in_use - tester.getProcs(self.options)) # Queue this new batch of runnable jobs self.queueJobs(run_jobs=next_job_group) # Not enough slots to run the job, currently else: # There will never be enough slots to run this job (insufficient slots) if tester.isFinished(): failed_downstream = self.processDownstreamTests(job_container) failed_downstream.add(job_container) self.queueJobs(status_jobs=failed_downstream) # There are no available slots, currently. Place back in queue, and sleep for a bit else: self.queueJobs(run_jobs=[job_container]) sleep(0.3) except Exception as e: print 'runWorker Exception: %s' % (e) self.killRemaining()
def execute_sqls_threaded(self, sql_queries, thread_pool_size=5): """ executes a array of SQLs using threads and returns results, useful for threaded batch operations Parameters: sql_queries array of SQL queries to execute thread_pool_size pool size to use, MAX a/c limit in PROD is 50 so its recommended to keep it around 2-5. Returns: True if all SQLs have been executed successfully, else False """ if len(sql_queries) == 0: return True start_time = time.time() if (thread_pool_size < 1): thread_pool_size = 1 POOL_SIZE = thread_pool_size if (len(sql_queries) < POOL_SIZE): POOL_SIZE = len(sql_queries) # Make the Pool of workers pool = ThreadPool(POOL_SIZE) print("Using pool size of {}".format(POOL_SIZE)) count = 0 failed_count = 0 OPERATIONS = len(sql_queries) result = True while ((count + failed_count) < OPERATIONS): # print(count,failed_count,OPERATIONS) try: for i, r in enumerate( pool.imap_unordered(self.start_query_execution_and_wait_for_completion, sql_queries), 1): try: # print(i,r) if r is None: failed_count = failed_count + 1 result = False elif "SUCCESS" in r and r["SUCCESS"] == False: failed_count = failed_count + 1 result = False # break else: print(r["QUERY"]) count += 1 # your code # elapsed_time = time.time() - start_time # sys.stderr.write('\r{0:%} {} {}'.format((count*1.0/OPERATIONS),count,elapsed_time)) sys.stderr.write( '\r{0:%} completed {1}, failed {2}, TOTAL: {3}'.format((count * 1.0 / OPERATIONS), count, failed_count, OPERATIONS)) except Exception as e: # print(traceback.format_exc()) print("#", str(e)) failed_count += 1 # print('#',sys.exc_info()[1]) # pass except Exception as e: # print(traceback.format_exc()) print(str(e)) failed_count += 1 # print('$',sys.exc_info()[1]) pass print("test_threaded_metric_log --- %s seconds ---for %s get ops using %s threads" % ( (time.time() - start_time), OPERATIONS, POOL_SIZE)) print("total: " + str(OPERATIONS) + ", failed: " + str(failed_count)) # close the pool and wait for the work to finish pool.close() pool.join() if ((result == True and count == OPERATIONS)): print("Operation successful") return True else: print("Operation had errors") raise Exception("Operation had errors")
class OrderedEnqueuer(SequenceEnqueuer): """Builds a Enqueuer from a Sequence. Used in `fit_generator`, `evaluate_generator`, `predict_generator`. Arguments: sequence: A `keras.utils.data_utils.Sequence` object. use_multiprocessing: use multiprocessing if True, otherwise threading scheduling: Sequential querying of datas if 'sequential', random otherwise. shuffle: Whether to shuffle the data at the beginning of each epoch. """ def __init__(self, sequence, use_multiprocessing=False, shuffle=False): self.sequence = sequence self.use_multiprocessing = use_multiprocessing self.shuffle = shuffle self.workers = 0 self.executor = None self.queue = None self.run_thread = None self.stop_signal = None def is_running(self): return self.stop_signal is not None and not self.stop_signal.is_set() def start(self, workers=1, max_queue_size=10): """Start the handler's workers. Arguments: workers: number of worker threads max_queue_size: queue size (when full, workers could block on `put()`) """ if self.use_multiprocessing: self.executor = multiprocessing.Pool(workers) else: self.executor = ThreadPool(workers) self.queue = queue.Queue(max_queue_size) self.stop_signal = threading.Event() self.run_thread = threading.Thread(target=self._run) self.run_thread.daemon = True self.run_thread.start() def _run(self): """Submits requests to the executor and queues the `Future` objects.""" sequence = list(range(len(self.sequence))) while True: if self.shuffle: random.shuffle(sequence) for i in sequence: if self.stop_signal.is_set(): return self.queue.put(self.executor.apply_async( get_index, (self.sequence, i)), block=True) self.sequence.on_epoch_end() def get(self): """Creates a generator to extract data from the queue. Skip the data if it is `None`. Yields: Tuples (inputs, targets) or (inputs, targets, sample_weights) """ try: while self.is_running(): inputs = self.queue.get(block=True).get() if inputs is not None: yield inputs except Exception as e: self.stop() raise StopIteration(e) def stop(self, timeout=None): """Stops running threads and wait for them to exit, if necessary. Should be called by the same thread which called `start()`. Arguments: timeout: maximum time to wait on `thread.join()` """ self.stop_signal.set() with self.queue.mutex: self.queue.queue.clear() self.queue.unfinished_tasks = 0 self.queue.not_full.notify() self.executor.close() self.executor.join() self.run_thread.join(timeout)
def convert_dataset(self, dataset, to_format, local_path, conversion_func=None, filters=None, annotation_filter=None): """ Convert entire dataset :param annotation_filter: :param dataset: :param to_format: :param local_path: :param conversion_func: Custom conversion service :param filters: optional :return: """ if to_format.lower() == 'coco': return self.__convert_dataset_to_coco( dataset=dataset, local_path=local_path, filters=filters, annotation_filter=annotation_filter) num_workers = 6 assert isinstance(dataset, entities.Dataset) self.dataset = dataset # download annotations if annotation_filter is None: dataset.download_annotations(local_path=local_path, overwrite=True) local_annotations_path = os.path.join(local_path, "json") output_annotations_path = os.path.join(local_path, to_format) pool = ThreadPool(processes=num_workers) i_item = 0 pages = dataset.items.list(filters=filters) # if yolo - create labels file if to_format == 'yolo': labels = [label.tag for label in dataset.labels] with open('{}/{}.names'.format(local_path, dataset.name), 'w') as fp: for label in labels: fp.write("{}\n".format(label)) pbar = tqdm.tqdm(total=pages.items_count) for page in pages: for item in page: i_item += 1 # create input annotations json in_filepath = os.path.join(local_annotations_path, item.filename[1:]) name, ext = os.path.splitext(in_filepath) in_filepath = name + '.json' save_to = os.path.dirname( in_filepath.replace(local_annotations_path, output_annotations_path)) if not os.path.isdir(save_to): os.makedirs(save_to, exist_ok=True) converter = utilities.Converter() converter.dataset = self.dataset converter.save_to_format = self.save_to_format converter.xml_template_path = self.xml_template_path if annotation_filter is None: method = converter.convert_file else: method = converter.__save_filtered_annotations_and_convert pool.apply_async(func=method, kwds={ "to_format": to_format, "from_format": 'dataloop', "file_path": in_filepath, "save_locally": True, "save_to": save_to, 'conversion_func': conversion_func, 'item': item, 'pbar': pbar, 'filters': annotation_filter }) pool.close() pool.join() pool.terminate() pbar.close()
def cli(): global options global output_list output_list = [] ips = [] (options, args) = get_parsed_args() # Generate ips from file if options.input_file is not None: try: with open(options.input_file, 'r') as f: lines = f.read().splitlines() except Exception as ex: print(ex) sys.exit(1) for item in lines: if '/' not in item: print( '\nerror: %s does not appear to be in CIDR format' % item ) sys.exit(1) try: network = ipaddr.IPv4Network(item) except (ipaddr.AddressValueError, ipaddr.NetmaskValueError): print('\nerror: %s is not a valid network' % item) sys.exit(1) network_ips = [str(ip) for ip in network.iterhosts()] ips += network_ips # Get network from command line else: if '/' not in args[0]: print('\nerror: %s does not appear to be in CIDR format' % args[0]) sys.exit(1) try: network = ipaddr.IPv4Network(args[0]) except (ipaddr.AddressValueError, ipaddr.NetmaskValueError): print('\nerror: %s is not a valid network' % args[0]) sys.exit(1) ips = [str(ip) for ip in network.iterhosts()] print('Scanning %d hosts...\n' % len(ips)) # Create thread pool of workers pool = ThreadPool(processes=options.workers) try: # .get(2592000) will set the pool timeout to one month # This is a 'fix' to successfull catch keyboard interrupt pool.map_async(work_work, ips).get(2592000) pool.close() pool.join() except KeyboardInterrupt: print('Aborting.') sys.exit(1) # Save ips to file if needed if options.output_file is not None: try: with open(options.output_file, 'w') as f: # Uses inet_aton to convert IP to binary format # So the sort works as expected output_list.sort(key=lambda ip: inet_aton(ip)) for ip in output_list: f.write('%s\n' % ip) except Exception as ex: print(ex) sys.exit(1) print('\nFinished: %d hosts scanned' % len(ips)) if options.reverse: print('Not responding hosts: %d' % len(output_list)) else: print('Alive hosts: %d' % len(output_list)) if options.output_file is not None: print("\nIPs list saved to '%s'" % options.output_file)
def __convert_dataset_to_coco(self, dataset: entities.Dataset, local_path, filters=None, annotation_filter=None): pages = dataset.items.list(filters=filters) dataset.download_annotations(local_path=local_path) path_to_dataloop_annotations_dir = os.path.join(local_path, 'json') labels = [label.tag for label in dataset.labels] np_labels = np.array(labels) class_list = np.unique(np_labels) label_to_id = { name: i for i, name in enumerate(class_list) if name not in ["done", 'completed', 'approved'] } categories = [{ 'id': i, 'name': name } for name, i in label_to_id.items()] images = [None for _ in range(pages.items_count)] converted_annotations = [None for _ in range(pages.items_count)] item_id_counter = 0 pool = ThreadPool(processes=11) pbar = tqdm.tqdm(total=pages.items_count) for page in pages: for item in page: pool.apply_async(func=self.__single_item_to_coco, kwds={ 'item': item, 'images': images, 'path_to_dataloop_annotations_dir': path_to_dataloop_annotations_dir, 'item_id': item_id_counter, 'converted_annotations': converted_annotations, 'annotation_filter': annotation_filter, 'label_to_id': label_to_id, 'pbar': pbar }) item_id_counter += 1 pool.close() pool.join() pool.terminate() pbar.close() total_converted_annotations = list() for ls in converted_annotations: if ls is not None: total_converted_annotations += ls coco_json = { 'images': [image for image in images if image is not None], 'annotations': total_converted_annotations, 'categories': categories } with open(os.path.join(local_path, 'coco.json'), 'w+') as f: json.dump(coco_json, f) return coco_json
class LocalDaskExecutor(Executor): """ An executor that runs all functions locally using `dask` and a configurable dask scheduler. Args: - scheduler (str): The local dask scheduler to use; common options are "threads", "processes", and "synchronous". Defaults to "threads". - **kwargs (Any): Additional keyword arguments to pass to dask config """ def __init__(self, scheduler: str = "threads", **kwargs: Any): self.scheduler = self._normalize_scheduler(scheduler) self.dask_config = kwargs self._pool = None # type: Optional[multiprocessing.pool.Pool] super().__init__() @staticmethod def _normalize_scheduler(scheduler: str) -> str: scheduler = scheduler.lower() if scheduler in ("threads", "threading"): return "threads" elif scheduler in ("processes", "multiprocessing"): return "processes" elif scheduler in ("sync", "synchronous", "single-threaded"): return "synchronous" else: raise ValueError(f"Unknown scheduler {scheduler!r}") def __getstate__(self) -> dict: state = self.__dict__.copy() state["_pool"] = None return state def __setstate__(self, state: dict) -> None: self.__dict__.update(state) def _interrupt_pool(self) -> None: """Interrupt all tasks in the backing `pool`, if any.""" if self.scheduler == "threads" and self._pool is not None: # `ThreadPool.terminate()` doesn't stop running tasks, only # prevents new tasks from running. In CPython we can attempt to # raise an exception in all threads. This exception will be raised # the next time the task does something with the Python api. # However, if the task is currently blocked in a c extension, it # will not immediately be interrupted. There isn't a good way # around this unfortunately. import platform if platform.python_implementation() != "CPython": self.logger.warning( "Interrupting a running threadpool is only supported in CPython, " "all currently running tasks will continue to completion") return self.logger.info( "Attempting to interrupt and cancel all running tasks...") import sys import ctypes # signature of this method changed in python 3.7 if sys.version_info >= (3, 7): id_type = ctypes.c_ulong else: id_type = ctypes.c_long for t in self._pool._pool: # type: ignore ctypes.pythonapi.PyThreadState_SetAsyncExc( id_type(t.ident), ctypes.py_object(KeyboardInterrupt)) @contextmanager def start(self) -> Iterator: """Context manager for initializing execution.""" # import dask here to reduce prefect import times import dask.config from dask.callbacks import Callback from dask.system import CPU_COUNT class PrefectCallback(Callback): def __init__(self): # type: ignore self.cache = {} def _start(self, dsk): # type: ignore overlap = set(dsk) & set(self.cache) for key in overlap: dsk[key] = self.cache[key] def _posttask(self, key, value, dsk, state, id): # type: ignore self.cache[key] = value with PrefectCallback(), dask.config.set(**self.dask_config): if self.scheduler == "synchronous": self._pool = None else: num_workers = dask.config.get("num_workers", None) or CPU_COUNT if self.scheduler == "threads": from multiprocessing.pool import ThreadPool self._pool = ThreadPool(num_workers) else: from dask.multiprocessing import get_context context = get_context() self._pool = context.Pool(num_workers) try: exiting_early = False yield except BaseException: exiting_early = True raise finally: if self._pool is not None: self._pool.terminate() if exiting_early: self._interrupt_pool() self._pool.join() self._pool = None def submit(self, fn: Callable, *args: Any, extra_context: dict = None, **kwargs: Any) -> "dask.delayed": """ Submit a function to the executor for execution. Returns a `dask.delayed` object. Args: - fn (Callable): function that is being submitted for execution - *args (Any): arguments to be passed to `fn` - extra_context (dict, optional): an optional dictionary with extra information about the submitted task - **kwargs (Any): keyword arguments to be passed to `fn` Returns: - dask.delayed: a `dask.delayed` object that represents the computation of `fn(*args, **kwargs)` """ # import dask here to reduce prefect import times import dask extra_kwargs = {} key = _make_task_key(**(extra_context or {})) if key is not None: extra_kwargs["dask_key_name"] = key return dask.delayed(fn, pure=False)(*args, **kwargs, **extra_kwargs) def wait(self, futures: Any) -> Any: """ Resolves a (potentially nested) collection of `dask.delayed` object to its values. Blocks until the computation is complete. Args: - futures (Any): iterable of `dask.delayed` objects to compute Returns: - Any: an iterable of resolved futures """ # import dask here to reduce prefect import times import dask return dask.compute(futures, scheduler=self.scheduler, pool=self._pool)[0]