def worker(self, db, lista): ''' Metodo per eseguire il processo di ricerca dei plugin in multithread Multithread method for online search ''' # Make the Pool of workers processes = 5 #WARNING: con la fibra posso arrivare a 20 senza errori, con adsl massimo 4 worker! pool = Pool(processes) # Open the urls in their own threads and return the results pluglist = pool.map(onlinePluginSearch, lista) #close the pool and wait for the work to finish pool.close() pool.join() #parsa il risultato (lista con tuple) e metti tutto in una stringa (result) e aggiorna cache result = '' for item in pluglist: if item[1] !=[]: for plug in item[1]: db.updateCache(item[0], plug) result = result + str(plug) + ',' numbers = result.count(',') + 1 print("Number of available pflugins: %s" % numbers) print("Adding to policy plugins: 19506,10287,12634 for credential checks and ping target.") result = result + "19506,10287,12634" #aggiungo sempre questi 3 plug-in per verificare se il target e' alive return result
def demo(args): """ Demonstrates the Python logging facility. """ cli = argparse.ArgumentParser() cli.add_argument("--verbose", "-v", action='count', default=ENV_VERBOSITY) cli.add_argument("--quiet", "-q", action='count', default=0) args = cli.parse_args(args) level = verbosity_to_level(args.verbose - args.quiet) info("new log level: " + str(level)) old_level = set_loglevel(level) info("old level was: " + str(old_level)) info("printing some messages with different log levels") spam("rofl") dbg("wtf?") info("foo") warn("WARNING!!!!") err("that didn't go so well") crit("pretty critical, huh?") info("restoring old loglevel") set_loglevel(old_level) info("old loglevel restored") info("running some threaded stuff") pool = ThreadPool() for i in range(8): pool.apply_async(info, ("async message #" + str(i),)) pool.close() pool.join()
def update(args=None): projects = list_projects(False, args.dir) print("Update in progress...") if args.j: pool = Pool(args.j) def worker(p): if p.is_behind(): p.update() print("{} updated".format(p.name)) for p in projects: pool.apply_async(worker, (p,)) pool.close() pool.join() else: for p in projects: if p.is_behind(): p.update() print("{} updated".format(p.name)) print("Update done")
def main(): # Run the Tales pool = ThreadPool(processes=int(tcfg['Workers'].get('pool_size', 10))) pool = ThreadPool() pool.map(worker, tales) pool.close() pool.join()
def local_job_runner(cmds_list, num_threads, throw_error=True): """ Execute a list of cmds locally using thread pool with at most num_threads threads, wait for all jobs to finish before exit. If throw_error is True, when any job failed, raise RuntimeError. If throw_error is False, return a list of cmds that failed. Parameters: cmds_list - cmds that will be executed in ThreadPool num_threads - number of threads that will be used in the ThreadPool throw_error - whether or not to throw RuntimeError when any of cmd failed. rescue - whether or not to rescue this job rescue_times - maximum number of rescue times """ run_cmd_in_shell = lambda x: backticks(x, merge_stderr=True) try: pool = ThreadPool(processes=num_threads) rets = pool.map(run_cmd_in_shell, cmds_list) pool.close() pool.join() except subprocess.CalledProcessError: pass failed_cmds = [cmds_list[i] for i in range(0, len(cmds_list)) if rets[i][1] != 0] failed_cmds_out = [rets[i][0] for i in range(0, len(cmds_list)) if rets[i][1] != 0] if throw_error and len(failed_cmds) > 0: errmsg = "\n".join(["CMD failed: %s, %s" % (cmd, out) for (cmd, out) in zip(failed_cmds, failed_cmds_out)]) raise RuntimeError(errmsg) else: return failed_cmds
def handle_noargs(self, **options): mimetypes.init() locked_print("===> Syncing static directory") pool = ThreadPool(20) # Sync every file in the static media dir with S3 def pooled_sync_file(base, filename): pool.apply_async(self.sync_file, args=[base, filename]) self.walk_tree([conf.SIMPLESTATIC_DIR], pooled_sync_file) pool.close() pool.join() locked_print("===> Static directory syncing complete") locked_print("===> Compressing and uploading CSS and JS") pool = ThreadPool(20) # Iterate over every template, looking for SimpleStaticNode def pooled_handle_template(base, filename): pool.apply_async(self.handle_template, args=[base, filename]) self.walk_tree(list(settings.TEMPLATE_DIRS), pooled_handle_template) pool.close() pool.join() locked_print("===> Finished compressing and uploading CSS and JS")
def main(dir_path, outfile_path, is_journal=True): pn = 20 flst = os.listdir(dir_path) arglst = [] ret = dict() for i in range(pn): beg = int(math.ceil(float(len(flst)) / pn * i)) end = int(math.ceil(float(len(flst)) / pn * (i + 1))) if(id == 0): beg = 0 if(id == pn - 1): end = (len(flst)) arglst.append([dir_path, is_journal, beg, end, i, ret]) pool = ThreadPool(pn) pool.map(job_map, arglst) pool.close() pool.join() print(80 * '=') print('[acmdl]: map finished') print(80 * '=') job_reduce(ret, outfile_path) print(80 * '=') print('[acmdl]: reduce finished') print(80 * '=') return
def _power_off_and_delete_all_vm_resources(self, api, reservation_details): resources = reservation_details.ReservationDescription.Resources pool = ThreadPool() async_results = [] lock = Lock() message_status = { "power_off": False, "delete": False } for resource in resources: resource_details = api.GetResourceDetails(resource.Name) if resource_details.VmDetails: result_obj = pool.apply_async(self._power_off_or_delete_deployed_app, (api, resource_details, lock, message_status)) async_results.append(result_obj) pool.close() pool.join() resource_to_delete = [] for async_result in async_results: result = async_result.get() if result is not None: resource_to_delete.append(result) # delete resource - bulk if resource_to_delete: api.DeleteResources(resource_to_delete)
def read(self, sftppath, localPath = None, numParallelConnections = 1): if localPath is None: localPath = os.getcwd() # local path - can be changed later sftp = paramiko.SFTPClient.from_transport(self.transport) if (numParallelConnections > 1): pool = ThreadPool(numParallelConnections) def getFile(sftppath, localpath): pconnection = SFTPConnection(self.connectionInfo) pconnection.connect() psftp = paramiko.SFTPClient.from_transport(pconnection.transport) psftp.get(sftppath, localpath) psftp.close() pconnection.close() def recursiveRead(sftp, sftppath, localPath): fileattr = sftp.lstat(sftppath) if not stat.S_ISDIR(fileattr.st_mode): #it is a file if (numParallelConnections > 1): pool.apply_async(getFile, args= (sftppath, os.path.join(localPath, os.path.basename(sftppath)))) else: sftp.get(sftppath, os.path.join(localPath, os.path.basename(sftppath))) else: #it is a directory try: #creating local directory, using try-catch to handle race conditions os.makedirs(os.path.join(localPath, os.path.basename(sftppath))) except OSError as exception: if exception.errno != errno.EEXIST: raise for file in sftp.listdir_attr(sftppath): recursiveRead(sftp, os.path.join(sftppath, file.filename), os.path.join(localPath, os.path.basename(sftppath))) recursiveRead(sftp, sftppath, localPath) sftp.close() if (numParallelConnections > 1): pool.close() pool.join()
def bench_compression_comparison(n_chunks, df_length, append_mul, pool_size, pool_step, repeats, use_raw_lz4, use_HC): _str = construct_test_data(df_length, append_mul) chunk_size = len(_str) / 1024 ** 2.0 _strarr = [_str] * n_chunks # Single threaded # --------------- measurements = bench_single(repeats, _strarr, use_HC) print_results(1, chunk_size, n_chunks, chunk_size*n_chunks, measurements) single_mean = np.mean(measurements) # Multi-threaded # -------------- for sz in range(2, pool_size + 1, pool_step): if use_raw_lz4: pool = ThreadPool(sz) else: pool = None c.set_compression_pool_size(sz) measurements = bench_multi(repeats, _strarr, use_HC, pool=pool) print_results(sz, chunk_size, n_chunks, chunk_size * n_chunks, measurements, compare=single_mean) if pool: pool.close() pool.join() print("")
def extract(url): """TODO: Docstring for extract. :returns: TODO """ try: r = rs.get(url) soup = bs(r.text, 'html.parser') tr_list = soup.tbody.find_all('tr') except AttributeError: print r return None info_list = [] for tr in tr_list: td_list = tr.find_all('td') if td_list[2].text.strip() == u'高匿名' and\ 'HTTP' in td_list[3].text.strip(' ').split(',') and\ 'GET' in td_list[4].text.strip(' ').split(','): infos = list() infos.append(td_list[0].text.strip()) infos.append(td_list[1].text.strip()) infos.append(td_list[6].text.strip()[:-1]) if td_list[7].text.find(u'小时') != -1: infos.append(float(td_list[7].text[:-3]) * 3600) else: infos.append(float(td_list[7].text[:-3]) * 60) info_list.append(infos) p = Pool(len(info_list)) proxy_list = p.map(wrapper, info_list) p.close() return proxy_list
def resolve_playlist(url): resolve_pool = ThreadPool(processes=16) logger.info("Resolving YouTube-Playlist '%s'", url) playlist = [] page = 'first' while page: params = { 'playlistId': url, 'maxResults': 50, 'key': yt_key, 'part': 'contentDetails' } if page and page != "first": logger.debug("Get YouTube-Playlist '%s' page %s", url, page) params['pageToken'] = page result = session.get(yt_api_endpoint+'playlistItems', params=params) data = result.json() page = data.get('nextPageToken') for item in data["items"]: video_id = item['contentDetails']['videoId'] playlist.append(video_id) playlist = resolve_pool.map(resolve_url, playlist) resolve_pool.close() return [item for item in playlist if item]
def _run_tests(self): "Runs the tests, produces no report." run_alone = [] tests = self._tests pool = ThreadPool(self._worker_count) try: for cmd, options in tests: options = options or {} if matches(self._configured_run_alone_tests, cmd): run_alone.append((cmd, options)) else: self._spawn(pool, cmd, options) pool.close() pool.join() if run_alone: util.log("Running tests marked standalone") for cmd, options in run_alone: self._run_one(cmd, **options) except KeyboardInterrupt: try: util.log('Waiting for currently running to finish...') self._reap_all() except KeyboardInterrupt: pool.terminate() raise except: pool.terminate() raise
def thread(host, port, threads, num): pool = ThreadPool(threads) for _ in range(num): pool.apply_async(job, (host, port)) time.sleep(0.001) pool.close() pool.join()
def check_artifact_cache(self, vts): """Checks the artifact cache for the specified VersionedTargetSets. Returns a list of the ones that were satisfied from the cache. These don't require building. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) if self._artifact_cache and self.context.options.read_from_artifact_cache: pool = ThreadPool(processes=6) res = pool.map(lambda vt: self._artifact_cache.use_cached_files(vt.cache_key), vts, chunksize=1) pool.close() pool.join() for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) self.context.log.info('Using cached artifacts for %s' % vt.targets) vt.update() else: self.context.log.info('No cached artifacts for %s' % vt.targets) return cached_vts, list(uncached_vts)
def run(self, suites): wrapper = self.config.plugins.prepareTest(suites) if wrapper is not None: suites = wrapper wrapped = self.config.plugins.setOutputStream(self.stream) if wrapped is not None: self.stream = wrapped result = self._makeResult() size = self.config.options.thread_pool if size < 0: size = cpu_count() pool = ThreadPool(size) with measure_time(result): for suite in suites: pool.apply_async(suite, args=(result,)) pool.close() pool.join() self.config.plugins.finalize(result) return result
def downloadPDFs(self): ### Download all the files extracted from the metadata startTime = time.strftime("%c") # Loop through the CSV f = open(self.csvpath) metadata = csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True) for row in metadata: pmcid = row[8] ### Check the input is a PMC ID if 'PMC' in pmcid: print('Starting thread for: '+pmcid) pool = Pool(30) pool.apply_async(self.saveFile, (pmcid,)) pool.close() pool.join() else: print('Something is wrong. '+pmcid+' is not a PMC id') sys.exit(0) f.close() print('Finished downloading all files: start {} end {}.'.format(startTime, time.strftime("%c")))
def check_online_streams(self): self.all_streams_offline = True self.set_status(' Checking online streams...') done_queue = queue.Queue() def check_stream_managed(args): url, queue = args status = self._check_stream(url) done_queue.put(url) return status pool = Pool(self.config.CHECK_ONLINE_THREADS) args = [(s['url'], done_queue) for s in self.streams] statuses = pool.map_async(check_stream_managed, args) n_streams = len(self.streams) while not statuses.ready(): sleep(0.1) self.set_status(' Checked {0}/{1} streams...'.format(done_queue.qsize(), n_streams)) self.s.refresh() statuses = statuses.get() for i, s in enumerate(self.streams): s['online'] = statuses[i] if s['online']: self.all_streams_offline = False self.refilter_streams() self.last_autocheck = int(time()) pool.close()
def ons_resolver(key): def check_server(server): try: namecoind = NamecoindServer(server, NAMECOIND_PORT, NAMECOIND_USER, NAMECOIND_PASSWD) return_data = namecoind.get_full_profile('u/' + key) return return_data except: return error_reply("Couldn't connect to namecoind") pool = ThreadPool(len(ONS_SERVERS)) replies = pool.map(check_server, ONS_SERVERS) pool.close() pool.join() data_hashes = [] for reply in replies: data_hashes.append(hashlib.md5(json.dumps(reply)).hexdigest()) count = Counter(data_hashes) max_repeated_times = count.most_common()[0][1] if max_repeated_times >= (SERVER_CONFIRMATION_PERCENTAGE/100.0) * len(ONS_SERVERS): return replies[0] else: return error_reply("Data from different ONS servers doens't match")
def main(): pool = ThreadPool(10) base_url = 'https://www.google.com/?gws_rd=ssl#q=' urls = [base_url+str(i) for i in xrange(1000)] pool.map(google_search, urls) pool.close() pool.join()
def poll_all(self, recipient_infos): # Recipient_info entries are of form: (player, type, body) results = dict() threads = dict() # For each recipient, make an asynchronous process to handle their response num_reqs = len(recipient_infos) pool = ThreadPool(processes=num_reqs) for info in recipient_infos: # Unpack poll() args receiver = info[0] rq_type = info[1] body = info[2] # Run each poll on a separate thread threads[receiver] = pool.apply_async(self.poll, (receiver, rq_type, body,)) # Get the results, store them in a dict # Seems like it defeats the purpose of polling asynchronously, but it doesn't (brain teaser?) for info in recipient_infos: receiver = info[0] try: results[receiver] = threads[receiver].get(timeout=self.timeout) except Exception as e: self.log_error(e) results[receiver] = None # Worry about this later # Clean up those threads pool.close() pool.join() # Return the dict return results
class parallel_map(collections.Iterable): def __init__(self, pool_size, function, *iterables): if not isinstance(pool_size, numbers.Integral): raise TypeError('pool_size must be an integer, not ' + repr(pool_size)) elif not callable(function): raise TypeError('function must be callable, not ' + repr(function)) elif not iterables: raise TypeError('missing iterable') self.pool = ThreadPool(pool_size) self.function = function self.results = self.pool.imap_unordered(self.map_function, zip(*iterables)) def map_function(self, args): try: value = self.function(*args) except Exception: return False, sys.exc_info() return True, value def __iter__(self): errors = [] for success, value in self.results: if success: yield value else: errors.append(value) self.pool.close() self.pool.join() for error in errors: exec('raise error[1], None, error[2]')
def _initialize_clients(self): """ Initialize all clients. """ logger.info("Initializing FDSN client(s) for %s." % ", ".join( _i.base_url if hasattr(_i, "base_url") else _i for _i in self.providers)) def _get_client(client_name): # It might already be an initialized client - in that case just # use it. if isinstance(client_name, Client): name, client = client_name.base_url, client_name else: try: this_client = Client(client_name, debug=self.debug) name, client = client_name, this_client except utils.ERRORS as e: if "timeout" in str(e).lower(): extra = " (timeout)" else: extra = "" logger.warn("Failed to initialize client '%s'.%s" % (client_name, extra)) return client_name, None services = sorted([_i for _i in client.services.keys() if not _i.startswith("available")]) if "dataselect" not in services or "station" not in services: logger.info("Cannot use client '%s' as it does not have " "'dataselect' and/or 'station' services." % name) return name, None return name, client # Catch warnings in the main thread. The catch_warnings() context # manager does not reliably work when used in multiple threads. p = ThreadPool(len(self.providers)) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") clients = p.map(_get_client, self.providers) p.close() for warning in w: logger.debug("Warning during initialization of one of the " "clients: " + str(warning.message)) clients = {key: value for key, value in clients if value is not None} # Write to initialized clients dictionary preserving order. Remember # that each passed provider might already be an initialized client # instance. for client in self.providers: if client not in clients and client not in clients.values(): continue name = client.base_url if hasattr(client, "base_url") else client self._initialized_clients[name] = clients[name] logger.info("Successfully initialized %i client(s): %s." % (len(self._initialized_clients), ", ".join(self._initialized_clients.keys())))
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names): original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt) pool = ThreadPool(pabot_args['processes']) if (pabot_args.has_key("hostsfile")): hosts = [host.rstrip('\r\n') for host in open(pabot_args["hostsfile"])] else: hosts = None if pabot_args["verbose"]: print [(suite,host) for (suite,host) in TestsuitesHosts(suite_names, hosts)] result = pool.map_async(execute_and_wait_with, [(datasources, outs_dir, options, suite, pabot_args['command'], pabot_args['verbose'], host) for (suite,host) in TestsuitesHosts(suite_names, hosts)]) pool.close() while not result.ready(): # keyboard interrupt is executed in main thread and needs this loop to get time to get executed try: time.sleep(0.1) except IOError: keyboard_interrupt() signal.signal(signal.SIGINT, original_signal_handler)
def _send_some_brokers(self, requests, ignore_errors=True): """ Sends a request to one or more brokers. The responses are returned mapped to the broker that they were retrieved from. This method uses a thread pool to parallelize sends. Args: request (int -> BaseRequest): A dictionary, where keys are integer broker IDs and the values are valid request objects that inherit from BaseRequest. Returns: dict (int -> BaseResponse): A map of broker IDs to response instances (inherited from BaseResponse). Failed requests are represented with a value of None """ results = {} pool = ThreadPool(processes=self.configuration.broker_threads) for broker_id in requests: results[broker_id] = pool.apply_async(self._send_to_broker, (broker_id, requests[broker_id])) pool.close() pool.join() responses = {} for broker_id in results: try: responses[broker_id] = results[broker_id].get() except ConnectionError: if ignore_errors: # Individual broker failures are OK, as we'll represent them with a None value responses[broker_id] = None else: raise return responses
def get_used_properties(self, set_ids=None, article_ids=None, **filters): """ Returns a sequency of property names in use in the specified set(s) (or setids) """ if set_ids is not None: filters["sets"] = set_ids if article_ids is not None: filters["ids"] = article_ids all_properties = self.get_properties() flexible_properties = set(all_properties) - set(ALL_FIELDS) body = {"query": {"bool": {"must": [ build_filter(**filters), {"exists": {"field": "fakeprop"}} ]}}} bodies = (copy.deepcopy(body) for _ in range(len(flexible_properties))) pool = ThreadPool() results = pool.imap(self._get_used_properties, zip(bodies, flexible_properties)) try: for found, prop in zip(results, flexible_properties): if found: yield prop finally: pool.close()
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names): original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt) pool = ThreadPool(pabot_args['processes']) if pabot_args.get("vectors"): result = pool.map_async(execute_and_wait_with, [(datasources, outs_dir, options, suite, pabot_args['command'], pabot_args['verbose'], vector) for suite in suite_names for vector in pabot_args['vectors']]) else: result = pool.map_async(execute_and_wait_with, [(datasources, outs_dir, options, suite, pabot_args['command'], pabot_args['verbose'], None) for suite in suite_names]) pool.close() while not result.ready(): # keyboard interrupt is executed in main thread and needs this loop to get time to get executed try: time.sleep(0.1) except IOError: keyboard_interrupt() signal.signal(signal.SIGINT, original_signal_handler)
def _listArtifacts(self, urls, gavs): """ Loads maven artifacts from list of GAVs and tries to locate the artifacts in one of the specified repositories. :param urls: repository URLs where the given GAVs can be located :param gavs: List of GAVs :returns: Dictionary where index is MavenArtifact object and value is it's repo root URL. """ def findArtifact(gav, urls, artifacts): artifact = MavenArtifact.createFromGAV(gav) for url in urls: if maven_repo_util.gavExists(url, artifact): #Critical section? artifacts[artifact] = ArtifactSpec(url) return logging.warning('Artifact %s not found in any url!', artifact) artifacts = {} pool = ThreadPool(maven_repo_util.MAX_THREADS) for gav in gavs: pool.apply_async(findArtifact, [gav, urls, artifacts]) # Close the pool and wait for the workers to finnish pool.close() pool.join() return artifacts
def run(self): pool = ThreadPool(self.num_agents) for idx in range(self.num_agents): pool.apply_async(self.run_experiement, args=(self.experiment, idx)) pool.close() pool.join()
def cleanup(self, odps): cleaned = [] def cleaner_thread(obj): try: obj.drop(odps) cleaned.append(obj) except: pass pool = ThreadPool(CLEANER_THREADS) if self._container: pool.map(cleaner_thread, self._container) pool.close() pool.join() for obj in cleaned: if obj in self._container: self._container.remove(obj) if not self._container: try: os.unlink(self._file_name) except OSError: pass else: self.dump()
def __convert_dataset_to_coco(self, dataset: entities.Dataset, local_path, filters=None, annotation_filter=None): pages = dataset.items.list(filters=filters) dataset.download_annotations(local_path=local_path) path_to_dataloop_annotations_dir = os.path.join(local_path, 'json') labels = [label.tag for label in dataset.labels] np_labels = np.array(labels) class_list = np.unique(np_labels) label_to_id = { name: i for i, name in enumerate(class_list) if name not in ["done", 'completed', 'approved'] } categories = [{ 'id': i, 'name': name } for name, i in label_to_id.items()] images = [None for _ in range(pages.items_count)] converted_annotations = [None for _ in range(pages.items_count)] item_id_counter = 0 pool = ThreadPool(processes=11) pbar = tqdm.tqdm(total=pages.items_count) for page in pages: for item in page: pool.apply_async(func=self.__single_item_to_coco, kwds={ 'item': item, 'images': images, 'path_to_dataloop_annotations_dir': path_to_dataloop_annotations_dir, 'item_id': item_id_counter, 'converted_annotations': converted_annotations, 'annotation_filter': annotation_filter, 'label_to_id': label_to_id, 'pbar': pbar }) item_id_counter += 1 pool.close() pool.join() pool.terminate() pbar.close() total_converted_annotations = list() for ls in converted_annotations: if ls is not None: total_converted_annotations += ls coco_json = { 'images': [image for image in images if image is not None], 'annotations': total_converted_annotations, 'categories': categories } with open(os.path.join(local_path, 'coco.json'), 'w+') as f: json.dump(coco_json, f) return coco_json
class ApiClient(object): """ Generic API client for Swagger client library builds. Swagger generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of the methods and models for each application are generated from the Swagger templates. NOTE: This class is auto generated by the swagger code generator program. Ref: https://github.com/swagger-api/swagger-codegen Do not edit the class manually. :param configuration: .Configuration object for this client :param header_name: a header to pass when making calls to the API. :param header_value: a header value to pass when making calls to the API. :param cookie: a cookie to include in the header when making calls to the API """ PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types NATIVE_TYPES_MAPPING = { 'int': int, 'long': int if PY3 else long, 'float': float, 'str': str, 'bool': bool, 'date': date, 'datetime': datetime, 'object': object, } def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration self.pool = ThreadPool() self.rest_client = RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/1.0.0/python' ########### Change # Store last api call metadata self.last_metadata = {} ########### End Change def __del__(self): self.pool.close() self.pool.join() @property def user_agent(self): """User agent for this API client""" return self.default_headers['User-Agent'] @user_agent.setter def user_agent(self, value): self.default_headers['User-Agent'] = value def set_default_header(self, header_name, header_value): self.default_headers[header_name] = header_value ########### Change def metadata_wrapper(fn): """Save metadata of last api call.""" @functools.wraps(fn) def wrapped_f(self, *args, **kwargs): self.last_metadata = {} self.last_metadata["url"] = self.configuration.host + args[0] self.last_metadata["method"] = args[1] self.last_metadata["timestamp"] = time.time() try: return fn(self, *args, **kwargs) except Exception as e: self.last_metadata["exception"] = e raise return wrapped_f def get_last_metadata(self): return self.last_metadata ########### End Change @metadata_wrapper def __call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): config = self.configuration # header parameters header_params = header_params or {} header_params.update(self.default_headers) if self.cookie: header_params['Cookie'] = self.cookie if header_params: header_params = self.sanitize_for_serialization(header_params) header_params = dict( self.parameters_to_tuples(header_params, collection_formats)) # path parameters if path_params: path_params = self.sanitize_for_serialization(path_params) path_params = self.parameters_to_tuples(path_params, collection_formats) for k, v in path_params: # specified safe chars, encode everything resource_path = resource_path.replace( '{%s}' % k, quote(str(v), safe=config.safe_chars_for_path_param)) # query parameters if query_params: query_params = self.sanitize_for_serialization(query_params) query_params = self.parameters_to_tuples(query_params, collection_formats) # post parameters if post_params or files: post_params = self.prepare_post_parameters(post_params, files) post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) # auth setting self.update_params_for_auth(header_params, query_params, auth_settings) # body if body: body = self.sanitize_for_serialization(body) # request url url = self.configuration.host + resource_path # perform request and return response response_data = self.request(method, url, query_params=query_params, headers=header_params, post_params=post_params, body=body, _preload_content=_preload_content, _request_timeout=_request_timeout) self.last_response = response_data return_data = response_data if _preload_content: # deserialize response data if response_type: return_data = self.deserialize(response_data, response_type) else: return_data = None ########### Change self.last_metadata["response"] = response_data self.last_metadata["return_data"] = return_data ########### End Change if _return_http_data_only: return (return_data) else: return (return_data, response_data.status, response_data.getheaders()) def sanitize_for_serialization(self, obj): """ Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is swagger model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. """ if obj is None: return None elif isinstance(obj, self.PRIMITIVE_TYPES): return obj elif isinstance(obj, list): return [ self.sanitize_for_serialization(sub_obj) for sub_obj in obj ] elif isinstance(obj, tuple): return tuple( self.sanitize_for_serialization(sub_obj) for sub_obj in obj) elif isinstance(obj, (datetime, date)): return obj.isoformat() if isinstance(obj, dict): obj_dict = obj else: # Convert model obj to dict except # attributes `swagger_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = { obj.attribute_map[attr]: getattr(obj, attr) for attr, _ in iteritems(obj.swagger_types) if getattr(obj, attr) is not None } return { key: self.sanitize_for_serialization(val) for key, val in iteritems(obj_dict) } def deserialize(self, response, response_type): """ Deserializes response into an object. :param response: RESTResponse object to be deserialized. :param response_type: class literal for deserialized object, or string of class name. :return: deserialized object. """ # handle file downloading # save response body into a tmp file and return the instance if response_type == "file": return self.__deserialize_file(response) # fetch data from response object try: data = json.loads(response.data) except ValueError: data = response.data return self.__deserialize(data, response_type) def __deserialize(self, data, klass): """ Deserializes dict, list, str into an object. :param data: dict, list or str. :param klass: class literal, or string of class name. :return: object. """ if data is None: return None if type(klass) == str: if klass.startswith('list['): sub_kls = re.match('list\[(.*)\]', klass).group(1) return [ self.__deserialize(sub_data, sub_kls) for sub_data in data ] if klass.startswith('dict('): sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) return { k: self.__deserialize(v, sub_kls) for k, v in iteritems(data) } # convert str to class if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: klass = getattr(models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) elif klass == object: return self.__deserialize_object(data) elif klass == date: return self.__deserialize_date(data) elif klass == datetime: return self.__deserialize_datatime(data) else: return self.__deserialize_model(data, klass) def call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, async=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None):
def execute_sqls_threaded(self, sql_queries, thread_pool_size=5): """ executes a array of SQLs using threads and returns results, useful for threaded batch operations Parameters: sql_queries array of SQL queries to execute thread_pool_size pool size to use, MAX a/c limit in PROD is 50 so its recommended to keep it around 2-5. Returns: True if all SQLs have been executed successfully, else False """ if len(sql_queries) == 0: return True start_time = time.time() if (thread_pool_size < 1): thread_pool_size = 1 POOL_SIZE = thread_pool_size if (len(sql_queries) < POOL_SIZE): POOL_SIZE = len(sql_queries) # Make the Pool of workers pool = ThreadPool(POOL_SIZE) print("Using pool size of {}".format(POOL_SIZE)) count = 0 failed_count = 0 OPERATIONS = len(sql_queries) result = True while ((count + failed_count) < OPERATIONS): # print(count,failed_count,OPERATIONS) try: for i, r in enumerate( pool.imap_unordered(self.start_query_execution_and_wait_for_completion, sql_queries), 1): try: # print(i,r) if r is None: failed_count = failed_count + 1 result = False elif "SUCCESS" in r and r["SUCCESS"] == False: failed_count = failed_count + 1 result = False # break else: print(r["QUERY"]) count += 1 # your code # elapsed_time = time.time() - start_time # sys.stderr.write('\r{0:%} {} {}'.format((count*1.0/OPERATIONS),count,elapsed_time)) sys.stderr.write( '\r{0:%} completed {1}, failed {2}, TOTAL: {3}'.format((count * 1.0 / OPERATIONS), count, failed_count, OPERATIONS)) except Exception as e: # print(traceback.format_exc()) print("#", str(e)) failed_count += 1 # print('#',sys.exc_info()[1]) # pass except Exception as e: # print(traceback.format_exc()) print(str(e)) failed_count += 1 # print('$',sys.exc_info()[1]) pass print("test_threaded_metric_log --- %s seconds ---for %s get ops using %s threads" % ( (time.time() - start_time), OPERATIONS, POOL_SIZE)) print("total: " + str(OPERATIONS) + ", failed: " + str(failed_count)) # close the pool and wait for the work to finish pool.close() pool.join() if ((result == True and count == OPERATIONS)): print("Operation successful") return True else: print("Operation had errors") raise Exception("Operation had errors")
def runAllRuns(params,threadCount=4): pool = ThreadPool(threadCount) results = pool.map(execExpr, params) pool.close() pool.join return results
def prepare_connectivity(self, reservation, cloud_provider_model, storage_client, resource_client, network_client, logger, actions, cancellation_context): """ :param logging.Logger logger: :param actions: list[cloudshell.cp.core.models.RequestActionBase] :param network_client: :param storage_client: :param resource_client: :param cloudshell.cp.azure.models.reservation_model.ReservationModel reservation: :param cloudshell.cp.azure.models.azure_cloud_provider_resource_model.AzureCloudProviderResourceModel cloud_provider_model:cloud provider :param cancellation_context cloudshell.shell.core.driver_context.CancellationContext instance :return: """ cidr = self._validate_request_and_extract_cidr(actions) logger.info("Received CIDR {0} from server".format(cidr)) reservation_id = reservation.reservation_id group_name = str(reservation_id) subnet_name = group_name tags = self.tags_service.get_tags(reservation=reservation) create_key_action_result = CreateKeysActionResult() # 1. Create a resource group logger.info("Creating a resource group: {0} .".format(group_name)) self.vm_service.create_resource_group( resource_management_client=resource_client, group_name=group_name, region=cloud_provider_model.region, tags=tags) self.cancellation_service.check_if_cancelled(cancellation_context) storage_account_name = self._prepare_storage_account_name( reservation_id) # 2+3. create storage account and keypairs (async) pool = ThreadPool() storage_res = pool.apply_async( self._create_storage_and_keypairs, (logger, storage_client, storage_account_name, group_name, cloud_provider_model, tags, cancellation_context, create_key_action_result)) logger.info( "Retrieving MGMT vNet from resource group {} by tag {}={}".format( cloud_provider_model.management_group_name, NetworkService.NETWORK_TYPE_TAG_NAME, NetworkService.MGMT_NETWORK_TAG_VALUE)) virtual_networks = self.network_service.get_virtual_networks( network_client=network_client, group_name=cloud_provider_model.management_group_name) self.cancellation_service.check_if_cancelled(cancellation_context) management_vnet = self.network_service.get_virtual_network_by_tag( virtual_networks=virtual_networks, tag_key=NetworkService.NETWORK_TYPE_TAG_NAME, tag_value=NetworkService.MGMT_NETWORK_TAG_VALUE) self._validate_management_vnet(management_vnet) logger.info( "Retrieving sandbox vNet from resource group {} by tag {}={}". format(cloud_provider_model.management_group_name, NetworkService.NETWORK_TYPE_TAG_NAME, NetworkService.SANDBOX_NETWORK_TAG_VALUE)) sandbox_vnet = self.network_service.get_virtual_network_by_tag( virtual_networks=virtual_networks, tag_key=NetworkService.NETWORK_TYPE_TAG_NAME, tag_value=NetworkService.SANDBOX_NETWORK_TAG_VALUE) self._validate_sandbox_vnet(sandbox_vnet) # 4. Create the NSG object security_group_name = reservation_id logger.info("Creating a network security group '{}' .".format( security_group_name)) network_security_group = self.security_group_service.create_network_security_group( network_client=network_client, group_name=group_name, security_group_name=security_group_name, region=cloud_provider_model.region, tags=tags) self.cancellation_service.check_if_cancelled(cancellation_context) logger.info("Creating NSG management rules...") # 5. Set rules on NSG to create a sandbox self._create_management_rules( group_name=group_name, management_vnet=management_vnet, network_client=network_client, sandbox_vnet_cidr=cidr, security_group_name=security_group_name, additional_mgmt_networks=cloud_provider_model. additional_mgmt_networks, logger=logger) self.cancellation_service.check_if_cancelled(cancellation_context) # 6. Create a subnet with NSG self._create_subnet(cidr=cidr, cloud_provider_model=cloud_provider_model, logger=logger, network_client=network_client, resource_client=resource_client, network_security_group=network_security_group, sandbox_vnet=sandbox_vnet, subnet_name=subnet_name) self.cancellation_service.check_if_cancelled(cancellation_context) # wait for all async operations pool.close() pool.join() storage_res.get( timeout=900 ) # will wait for 15 min and raise exception if storage account creation failed return self._prepare_results(create_key_action_result, actions)
request = "http://127.0.0.1:8000/" r = requests.put(request, params={ 'short': str(i), 'long': longResource }) except Exception as e: print(e) numWrites = [10, 100, 1000, 4000] with open('./data/varying_writes.tsv', 'wt') as out_file: for i in range(len(numWrites)): pool = Pool(pool_size) t0 = time.time() for j in range(numWrites[i]): pool.apply_async(worker, (j, )) pool.close() pool.join() t1 = time.time() tsv_writer = csv.writer(out_file, delimiter='\t') tsv_writer.writerow([t1 - t0, numWrites[i]]) print("{} writes: {} seconds".format(numWrites[i], t1 - t0))
def convert_dataset(self, dataset, to_format, local_path, conversion_func=None, filters=None, annotation_filter=None): """ Convert entire dataset :param annotation_filter: :param dataset: :param to_format: :param local_path: :param conversion_func: Custom conversion service :param filters: optional :return: """ if to_format.lower() == 'coco': return self.__convert_dataset_to_coco( dataset=dataset, local_path=local_path, filters=filters, annotation_filter=annotation_filter) num_workers = 6 assert isinstance(dataset, entities.Dataset) self.dataset = dataset # download annotations if annotation_filter is None: dataset.download_annotations(local_path=local_path, overwrite=True) local_annotations_path = os.path.join(local_path, "json") output_annotations_path = os.path.join(local_path, to_format) pool = ThreadPool(processes=num_workers) i_item = 0 pages = dataset.items.list(filters=filters) # if yolo - create labels file if to_format == 'yolo': labels = [label.tag for label in dataset.labels] with open('{}/{}.names'.format(local_path, dataset.name), 'w') as fp: for label in labels: fp.write("{}\n".format(label)) pbar = tqdm.tqdm(total=pages.items_count) for page in pages: for item in page: i_item += 1 # create input annotations json in_filepath = os.path.join(local_annotations_path, item.filename[1:]) name, ext = os.path.splitext(in_filepath) in_filepath = name + '.json' save_to = os.path.dirname( in_filepath.replace(local_annotations_path, output_annotations_path)) if not os.path.isdir(save_to): os.makedirs(save_to, exist_ok=True) converter = utilities.Converter() converter.dataset = self.dataset converter.save_to_format = self.save_to_format converter.xml_template_path = self.xml_template_path if annotation_filter is None: method = converter.convert_file else: method = converter.__save_filtered_annotations_and_convert pool.apply_async(func=method, kwds={ "to_format": to_format, "from_format": 'dataloop', "file_path": in_filepath, "save_locally": True, "save_to": save_to, 'conversion_func': conversion_func, 'item': item, 'pbar': pbar, 'filters': annotation_filter }) pool.close() pool.join() pool.terminate() pbar.close()
class ApiClient(object): """Generic API client for Swagger client library builds. Swagger generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of the methods and models for each application are generated from the Swagger templates. NOTE: This class is auto generated by the swagger code generator program. Ref: https://github.com/swagger-api/swagger-codegen Do not edit the class manually. :param configuration: .Configuration object for this client :param header_name: a header to pass when making calls to the API. :param header_value: a header value to pass when making calls to the API. :param cookie: a cookie to include in the header when making calls to the API """ PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types NATIVE_TYPES_MAPPING = { 'int': int, 'long': int if six.PY3 else long, # noqa: F821 'float': float, 'str': str, 'bool': bool, 'date': datetime.date, 'datetime': datetime.datetime, 'object': object, } def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration self.pool = ThreadPool() self.rest_client = rest.RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/0.2.3/python' # This is used for detecting for the special case of a path parameter # that is tagged with x-isi-url-encode-path-param (more details in the # __call_api function). self.quote_plus_tag = "__x-isi-url-encode-path-param__" self.quote_plus_tag_len = len(self.quote_plus_tag) self.session_expiration = 0 self.inactive_expiration = 0 self.x_csrf_token = None def __del__(self): self.pool.close() self.pool.join() @property def user_agent(self): """User agent for this API client""" return self.default_headers['User-Agent'] @user_agent.setter def user_agent(self, value): self.default_headers['User-Agent'] = value def set_default_header(self, header_name, header_value): self.default_headers[header_name] = header_value def __call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): config = self.configuration # header parameters header_params = header_params or {} header_params.update(self.default_headers) if self.cookie: header_params['Cookie'] = self.cookie if header_params: header_params = self.sanitize_for_serialization(header_params) header_params = dict( self.parameters_to_tuples(header_params, collection_formats)) # path parameters if path_params: path_params = self.sanitize_for_serialization(path_params) path_params = self.parameters_to_tuples(path_params, collection_formats) for k, v in path_params: v_str = str(v) # Check for the special case of the # x-isi-url-encode-path-param, which indicates that the # parameter should be encoded with quote_plus in order # to encode the '/' character. # check if the first part of v_str matches the tag if v_str[:self.quote_plus_tag_len] == self.quote_plus_tag: # remove "__x-isi-url-encode-path-param__" v_str = v_str[self.quote_plus_tag_len:] # then url-encode with quote_plus replacement = quote_plus(v_str) else: replacement = quote(v_str, safe=config.safe_chars_for_path_param) # specified safe chars, encode everything resource_path = resource_path.replace('{%s}' % k, replacement) # query parameters if query_params: query_params = self.sanitize_for_serialization(query_params) query_params = self.parameters_to_tuples(query_params, collection_formats) # post parameters if post_params or files: post_params = self.prepare_post_parameters(post_params, files) post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) # auth setting if not self.configuration.host.startswith('papi://'): self.update_params_for_auth(header_params, query_params, auth_settings) # body if body: body = self.sanitize_for_serialization(body) # request url url = self.configuration.host + resource_path # perform request and return response response_data = self.request(method, url, query_params=query_params, headers=header_params, post_params=post_params, body=body, _preload_content=_preload_content, _request_timeout=_request_timeout) self.last_response = response_data return_data = response_data if _preload_content: # deserialize response data if response_type: return_data = self.deserialize(response_data, response_type) else: return_data = None if _return_http_data_only: return (return_data) else: return (return_data, response_data.status, response_data.getheaders()) def sanitize_for_serialization(self, obj): """Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is swagger model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. """ if obj is None: return None elif isinstance(obj, self.PRIMITIVE_TYPES): return obj elif isinstance(obj, list): return [ self.sanitize_for_serialization(sub_obj) for sub_obj in obj ] elif isinstance(obj, tuple): return tuple( self.sanitize_for_serialization(sub_obj) for sub_obj in obj) elif isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() if isinstance(obj, dict): obj_dict = obj else: # Convert model obj to dict except # attributes `swagger_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = { obj.attribute_map[attr]: getattr(obj, attr) for attr, _ in six.iteritems(obj.swagger_types) if getattr(obj, attr) is not None } return { key: self.sanitize_for_serialization(val) for key, val in six.iteritems(obj_dict) } def deserialize(self, response, response_type): """Deserializes response into an object. :param response: RESTResponse object to be deserialized. :param response_type: class literal for deserialized object, or string of class name. :return: deserialized object. """ # handle file downloading # save response body into a tmp file and return the instance if response_type == "file": return self.__deserialize_file(response) # fetch data from response object try: data = json.loads(response.data) except ValueError: data = response.data return self.__deserialize(data, response_type) def __deserialize(self, data, klass): """Deserializes dict, list, str into an object. :param data: dict, list or str. :param klass: class literal, or string of class name. :return: object. """ if data is None: return None if type(klass) == str: if klass.startswith('list['): sub_kls = re.match('list\[(.*)\]', klass).group(1) return [ self.__deserialize(sub_data, sub_kls) for sub_data in data ] if klass.startswith('dict('): sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) return { k: self.__deserialize(v, sub_kls) for k, v in six.iteritems(data) } # convert str to class if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: klass = getattr(isi_sdk_8_0.models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) elif klass == object: return self.__deserialize_object(data) elif klass == datetime.date: return self.__deserialize_date(data) elif klass == datetime.datetime: return self.__deserialize_datatime(data) else: return self.__deserialize_model(data, klass) def call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, async=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None):
def geoparse(self, doc, verbose=False): """Main geoparsing function. Text to extracted, resolved entities. Parameters ---------- doc : str or spaCy The document to be geoparsed. Can be either raw text or already spacy processed. In some cases, it makes sense to bulk parse using spacy's .pipe() before sending through to Mordecai Returns ------- proced : list of dicts Each entity gets an entry in the list, with the dictionary including geo info, spans, and optionally, the input features. """ if not hasattr(doc, "ents"): doc = nlp(doc) proced = self.infer_country(doc) if not proced: return [] # logging! #print("Nothing came back from infer_country...") if self.threads: pool = ThreadPool(len(proced)) results = pool.map(self.proc_lookup_country, proced) pool.close() pool.join() else: results = [] for loc in proced: # if the confidence is too low, don't use the country info if loc['country_conf'] > self.country_threshold: res = self.query_geonames_country(loc['word'], loc['country_predicted']) results.append(res) else: results.append("") for n, loc in enumerate(proced): res = results[n] try: _ = res['hits']['hits'] # If there's no geonames result, what to do? # For now, just continue. # In the future, delete? Or add an empty "loc" field? except (TypeError, KeyError): continue # Pick the best place X, meta = self.features_for_rank(loc, res) if X.shape[1] == 0: # This happens if there are no results... continue all_tasks, sorted_meta, sorted_X = self.format_for_prodigy( X, meta, loc['word'], return_feature_subset=True) fl_pad = np.pad(sorted_X, ((0, 4 - sorted_X.shape[0]), (0, 0)), 'constant') fl_unwrap = fl_pad.flatten() prediction = self.rank_model.predict(np.asmatrix(fl_unwrap)) place_confidence = prediction.max() loc['geo'] = sorted_meta[prediction.argmax()] loc['place_confidence'] = place_confidence if not verbose: proced = self.clean_proced(proced) return proced
class hxtool_scheduler: def __init__(self, thread_count = None): self._lock = threading.Lock() self.task_queue = {} self.history_queue = {} self.task_hx_api_sessions = {} self._poll_thread = threading.Thread(target = self._scan_task_queue, name = "PollThread") self._stop_event = threading.Event() # Allow for thread oversubscription based on CPU count self.thread_count = thread_count or (cpu_count() + 1) self.task_threads = ThreadPool(self.thread_count) logger.info("Task scheduler initialized.") def _scan_task_queue(self): while not self._stop_event.wait(.1): ret = None with self._lock: ret = self.task_threads.imap_unordered(self._run_task, [_ for _ in self.task_queue.values() if _.should_run()]) if ret: while not self._stop_event.is_set(): try: ret.next(timeout=5) except TimeoutError: break except StopIteration: break except Exception as e: logger.error(pretty_exceptions(e)) continue def _run_task(self, task): ret = False task.set_state(TASK_STATE_QUEUED) logger.debug("Executing task with id: %s, name: %s.", task.task_id, task.name) try: ret = task.run(self) except Exception as e: logger.error(pretty_exceptions(e)) task.set_state(TASK_STATE_FAILED) finally: return ret def _add_task_api_task(self, profile_id, hx_host, hx_port, username, password): self.task_hx_api_sessions[profile_id] = HXAPI(hx_host, hx_port = hx_port, proxies = hxtool_global.hxtool_config['network'].get('proxies'), headers = hxtool_global.hxtool_config['headers'], cookies = hxtool_global.hxtool_config['cookies'], logger_name = hxtool_logging.getLoggerName(HXAPI.__name__), default_encoding = default_encoding) api_login_task = hxtool_scheduler_task(profile_id, "Task API Login - {}".format(hx_host), immutable = True) api_login_task.add_step(hxtool_task_modules.task_api_session_module, kwargs = { 'profile_id' : profile_id, 'username' : username, 'password' : password }) self.add(api_login_task) def start(self): self._poll_thread.start() logger.info("Task scheduler started with %s threads.", self.thread_count) def stop(self): logger.debug("stop() enter.") self._stop_event.set() logger.debug("Closing the task thread pool.") self.task_threads.close() logger.debug("Waiting for running threads to terminate.") self.task_threads.join() logger.debug("stop() exit.") def initialize_task_api_sessions(self): # Loop through background credentials and start the API sessions profiles = hxtool_global.hxtool_db.profileList() for profile in profiles: task_api_credential = hxtool_global.hxtool_db.backgroundProcessorCredentialGet(profile['profile_id']) if task_api_credential: try: salt = HXAPI.b64(task_api_credential['salt'], True) iv = HXAPI.b64(task_api_credential['iv'], True) key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY) decrypted_background_password = crypt_aes(key, iv, task_api_credential['hx_api_encrypted_password'], decrypt = True) self._add_task_api_task(profile['profile_id'], profile['hx_host'], profile['hx_port'], task_api_credential['hx_api_username'], decrypted_background_password) decrypted_background_password = None except UnicodeDecodeError: logger.error("Please reset the background credential for {} ({}).".format(profile['hx_host'], profile['profile_id'])) else: logger.info("No background credential for {} ({}).".format(profile['hx_host'], profile['profile_id'])) def add_task_api_session(self, profile_id, hx_host, hx_port, username, password): iv = crypt_generate_random(16) salt = crypt_generate_random(32) key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY) encrypted_password = crypt_aes(key, iv, password) hxtool_global.hxtool_db.backgroundProcessorCredentialCreate(profile_id, username, HXAPI.b64(iv), HXAPI.b64(salt), encrypted_password) encrypted_password = None self._add_task_api_task(profile_id, hx_host, hx_port, username, password) password = None def remove_task_api_session(self, profile_id): out = hxtool_global.hxtool_db.backgroundProcessorCredentialRemove(profile_id) hx_api_object = self.task_hx_api_sessions.get(profile_id) if hx_api_object and hx_api_object.restIsSessionValid(): (ret, response_code, response_data) = hx_api_object.restLogout() del self.task_hx_api_sessions[profile_id] def logout_task_api_sessions(self): for hx_api_object in self.task_hx_api_sessions.values(): if hx_api_object is not None: hx_api_object.restLogout() hx_api_object = None def signal_child_tasks(self, parent_task_id, parent_task_state, parent_stored_result): with self._lock: for task_id in self.task_queue: self.task_queue[task_id].parent_state_callback(parent_task_id, parent_task_state, parent_stored_result) def add(self, task, should_store = True): with self._lock: self.task_queue[task.task_id] = task task.set_state(TASK_STATE_SCHEDULED) # Note: this must be within the lock otherwise we run into a nasty race condition where the task runs before the stored state is set - # with the run lock taking precedence. if should_store: task.store() return task.task_id def add_list(self, tasks): if isinstance(tasks, list): for t in tasks: self.add(t) def remove(self, task_id, delete_children=True): if task_id: with self._lock: if delete_children: # We need to make a shallow copy so we don't modify the task_queue while iterating over it for child_task_id in [_.task_id for _ in self.task_queue.values() if _.parent_id == task_id]: self.task_queue[child_task_id].remove() del self.task_queue[child_task_id] for child_task_id in [_['task_id'] for _ in self.history_queue.values() if _['parent_id'] == task_id]: del self.history_queue[child_task_id] t = self.task_queue.get(task_id, None) if t and not t.immutable: t.remove() del self.task_queue[task_id] t = None elif task_id in self.history_queue: del self.history_queue[task_id] def get(self, task_id): with self._lock: return self.task_queue.get(task_id, None) def move_to_history(self, task_id): with self._lock: t = self.task_queue.pop(task_id, None) if t is not None: self.history_queue[task_id] = t.metadata() if len(self.history_queue) > MAX_HISTORY_QUEUE_LENGTH: self.history_queue.popitem() def tasks(self): # Shallow copy to avoid locking return [_.metadata() for _ in list(self.task_queue.values())] + list(self.history_queue.values()) # Load queued tasks from the database def load_from_database(self): try: if self.status(): tasks = hxtool_global.hxtool_db.taskList() for task_entry in tasks: p_id = task_entry.get('parent_id', None) if p_id and (not task_entry['parent_complete'] and not hxtool_global.hxtool_db.taskGet(task_entry['profile_id'], p_id)): logger.warn("Deleting orphan task {}, {}".format(task_entry['name'], task_entry['task_id'])) hxtool_global.hxtool_db.taskDelete(task_entry['profile_id'], task_entry['task_id']) else: task = hxtool_scheduler_task.deserialize(task_entry) task.set_stored() # Set should_store to False as we've already been stored, and we skip a needless update self.add(task, should_store = False) else: logger.warn("Task scheduler must be running before loading queued tasks from the database.") except Exception as e: logger.error("Failed to load saved tasks from the database. Error: {}".format(pretty_exceptions(e))) def status(self): return self._poll_thread.is_alive()
class WorkerPool: """A pool of workers. Workers are threads, and so are subject to GIL constraints. Submitting CPU-bound work may not be effective. Use this class primarily for IO-bound work. """ def __init__(self, parent_workunit, run_tracker, num_workers, thread_name_prefix): self._run_tracker = run_tracker self.thread_lock = threading.Lock() self.thread_counter = 0 def intitialize(): with self.thread_lock: threading.current_thread().name = "{}-{}".format( thread_name_prefix, self.thread_counter) self.thread_counter += 1 self._run_tracker.register_thread(parent_workunit) # All workers accrue work to the same root. self._pool = ThreadPool( processes=num_workers, initializer=intitialize, ) # We mustn't shutdown when there are pending workchains, as they may need to submit work # in the future, and the pool doesn't know about this yet. self._pending_workchains = 0 self._pending_workchains_cond = threading.Condition( ) # Protects self._pending_workchains. self._shutdown_hooks = [] self.num_workers = num_workers def add_shutdown_hook(self, hook): self._shutdown_hooks.append(hook) def submit_async_work(self, work, workunit_parent=None, on_success=None, on_failure=None): """Submit work to be executed in the background. :param work: The work to execute. :param workunit_parent: If specified, work is accounted for under this workunit. :param on_success: If specified, a callable taking a single argument, which will be a list of return values of each invocation, in order. Called only if all work succeeded. :param on_failure: If specified, a callable taking a single argument, which is an exception thrown in the work. :return: `multiprocessing.pool.MapResult` Don't do work in on_success: not only will it block the result handling thread, but that thread is not a worker and doesn't have a logging context etc. Use it just to submit further work to the pool. """ if work is None or len( work.args_tuples ) == 0: # map_async hangs on 0-length iterables. if on_success: on_success([]) else: def do_work(*args): self._do_work(work.func, *args, workunit_name=work.workunit_name, workunit_parent=workunit_parent, on_failure=on_failure) return self._pool.map_async(do_work, work.args_tuples, chunksize=1, callback=on_success) def submit_async_work_chain(self, work_chain, workunit_parent, done_hook=None): """Submit work to be executed in the background. - work_chain: An iterable of Work instances. Will be invoked serially. Each instance may have a different cardinality. There is no output-input chaining: the argument tuples must already be present in each work instance. If any work throws an exception no subsequent work in the chain will be attempted. - workunit_parent: Work is accounted for under this workunit. - done_hook: If not None, invoked with no args after all work is done, or on error. """ def done(): if done_hook: done_hook() with self._pending_workchains_cond: self._pending_workchains -= 1 self._pending_workchains_cond.notify() def error(e): done() self._run_tracker.log(Report.ERROR, '{}'.format(e)) # We filter out Nones defensively. There shouldn't be any, but if a bug causes one, # Pants might hang indefinitely without this filtering. work_iter = (_f for _f in work_chain if _f) def submit_next(): try: self.submit_async_work(next(work_iter), workunit_parent=workunit_parent, on_success=lambda x: submit_next(), on_failure=error) except StopIteration: done() # The success case. with self._pending_workchains_cond: self._pending_workchains += 1 try: submit_next() except Exception as e: # Handles errors in the submission code. done() self._run_tracker.log(Report.ERROR, '{}'.format(e)) raise def submit_work_and_wait(self, work, workunit_parent=None): """Submit work to be executed on this pool, but wait for it to complete. - work: The work to execute. - workunit_parent: If specified, work is accounted for under this workunit. Returns a list of return values of each invocation, in order. Throws if any invocation does. """ if work is None or len( work.args_tuples) == 0: # map hangs on 0-length iterables. return [] else: def do_work(*args): return self._do_work(work.func, *args, workunit_name=work.workunit_name, workunit_parent=workunit_parent) # We need to specify a timeout explicitly, because otherwise python ignores SIGINT when waiting # on a condition variable, so we won't be able to ctrl-c out. return self._pool.map_async(do_work, work.args_tuples, chunksize=1).get(timeout=1000000000) def _do_work(self, func, args_tuple, workunit_name, workunit_parent, on_failure=None): try: if workunit_name: with self._run_tracker.new_workunit_under_parent( name=workunit_name, parent=workunit_parent): return func(*args_tuple) else: return func(*args_tuple) except KeyboardInterrupt: # If a worker thread intercepts a KeyboardInterrupt, we want to propagate it to the main # thread. _thread.interrupt_main() raise except Exception as e: if on_failure: # Note that here the work's workunit is closed. So, e.g., it's OK to use on_failure() # to close an ancestor workunit. on_failure(e) raise def shutdown(self): with self._pending_workchains_cond: while self._pending_workchains > 0: self._pending_workchains_cond.wait() self._pool.close() self._pool.join() for hook in self._shutdown_hooks: hook() def abort(self): self._pool.terminate()
def make_country_features(self, doc, require_maj=False): """ Create features for the country picking model. Function where all the individual feature maker functions are called and aggregated. (Formerly "process_text") Parameters ----------- doc : str or spaCy doc Returns ------- task_list : list of dicts Each entry has the word, surrounding text, span, and the country picking features. This output can be put into Prodigy for labeling almost as-is (the "features" key needs to be renamed "meta" or be deleted.) """ if not hasattr(doc, "ents"): doc = nlp(doc) # initialize the place to store finalized tasks task_list = [] # get document vector #doc_vec = self._feature_word_embedding(text)['country_1'] # get explicit counts of country names ct_mention, ctm_count1, ct_mention2, ctm_count2 = self._feature_country_mentions( doc) # pull out the place names, skipping empty ones, countries, and known # junk from the skip list (like "Atlanic Ocean" ents = [] for ent in doc.ents: if not ent.text.strip(): continue if ent.label_ not in ["GPE", "LOC", "FAC"]: continue # don't include country names (make a parameter) if ent.text.strip() in self._skip_list: continue ents.append(ent) if not ents: return [] # Look them up in geonames, either sequentially if no threading, or # in parallel if threads. if self.threads: pool = ThreadPool(len(ents)) ent_text = [i.text for i in ents] ent_results = pool.map(self.simple_lookup, ent_text) pool.close() pool.join() else: ent_results = [] for ent in ents: try: result = self.query_geonames(ent.text) except ConnectionTimeout: result = "" ent_results.append(result) for n, ent in enumerate(ents): result = ent_results[n] #skip_list.add(ent.text.strip()) ent_label = ent.label_ # destroyed by trimming ent = self.clean_entity(ent) # vector for just the solo word vp = self._feature_word_embedding(ent) try: word_vec = vp['country_1'] wv_confid = float(vp['confid_a']) except TypeError: # no idea why this comes up word_vec = "" wv_confid = "0" # look for explicit mentions of feature names class_mention, code_mention = self._feature_location_type_mention( ent) # build results-based features most_alt = self._feature_most_alternative(result) # TODO check if most_common feature really isn't that useful most_common = self._feature_most_common(result) most_pop = self._feature_most_population(result) first_back, second_back = self._feature_first_back(result) try: maj_vote = Counter([ word_vec, most_alt, first_back, most_pop, ct_mention #doc_vec_sent, doc_vec ]).most_common()[0][0] except Exception as e: print("Problem taking majority vote: ", ent, e) maj_vote = "" if not maj_vote: maj_vote = "" # We only want all this junk for the labeling task. We just want to straight to features # and the model when in production. try: start = ent.start_char end = ent.end_char iso_label = maj_vote try: text_label = self._inv_cts[iso_label] except KeyError: text_label = "" task = { "text": ent.text, "label": text_label, # human-readable country name "word": ent.text, "spans": [{ "start": start, "end": end, } # make sure to rename for Prodigy ], "features": { "maj_vote": iso_label, "word_vec": word_vec, "first_back": first_back, #"doc_vec" : doc_vec, "most_alt": most_alt, "most_pop": most_pop, "ct_mention": ct_mention, "ctm_count1": ctm_count1, "ct_mention2": ct_mention2, "ctm_count2": ctm_count2, "wv_confid": wv_confid, "class_mention": class_mention, # inferred geonames class from mentions "code_mention": code_mention, #"places_vec" : places_vec, #"doc_vec_sent" : doc_vec_sent } } task_list.append(task) except Exception as e: print(ent.text, ) print(e) return task_list # rename this var
class PandABlocksClient: # Sentinel that tells the send_loop and recv_loop to stop STOP = object() def __init__(self, hostname="localhost", port=8888, queue_cls=None): if queue_cls is None: try: # Python 2 from Queue import Queue as queue_cls except ImportError: # Python 3 from queue import Queue as queue_cls self.queue_cls = queue_cls self.hostname = hostname self.port = port # Completed lines for a response in progress self._completed_response_lines = [] # True if the current response is multiline self._is_multiline = None # True when we have been started self.started = False # Filled in on start self._socket = None self._send_spawned = None self._send_queue = None self._recv_spawned = None self._response_queues = None self._thread_pool = None def start(self, spawn=None, socket_cls=None): if spawn is None: from multiprocessing.pool import ThreadPool self._thread_pool = ThreadPool(2) spawn = self._thread_pool.apply_async if socket_cls is None: from socket import socket as socket_cls assert not self.started, "Send and recv threads already started" # Holds (message, response_queue) to send next self._send_queue = self.queue_cls() # Holds response_queue to send next self._response_queues = self.queue_cls() self._socket = socket_cls() try: self._socket.connect((self.hostname, self.port)) except OSError as e: raise ConnectionError( f"Can't connect to '{self.hostname}:{self.port}', " "did all services on the PandA start correctly?" ) from e self._send_spawned = spawn(self._send_loop) self._recv_spawned = spawn(self._recv_loop) self.started = True def stop(self): assert self.started, "Send and recv threads not started" self._send_queue.put((self.STOP, None)) self._send_spawned.wait() import socket try: self._socket.shutdown(socket.SHUT_RD) except Exception: pass self._recv_spawned.wait() self._socket.close() self._socket = None self.started = False if self._thread_pool is not None: self._thread_pool.close() self._thread_pool.join() self._thread_pool = None def send(self, message): response_queue = self.queue_cls() self._send_queue.put((message, response_queue)) return response_queue def recv(self, response_queue, timeout=10.0): response = response_queue.get(timeout=timeout) if isinstance(response, Exception): raise response else: return response def send_recv(self, message, timeout=10.0): """Send a message to a PandABox and wait for the response Args: message (str): The message to send timeout (float): How long to wait before raising queue.Empty Returns: str: The response """ response_queue = self.send(message) response = self.recv(response_queue, timeout) return response def _send_loop(self): """Service self._send_queue, sending requests to server""" while True: message, response_queue = self._send_queue.get() if message is self.STOP: break try: self._response_queues.put(response_queue) self._socket.sendall(message.encode("utf-8")) except Exception: # pylint:disable=broad-except log.exception("Exception sending message %s", message) def _get_lines(self): buf = "" while True: lines = buf.split("\n") for line in lines[:-1]: yield line buf = lines[-1] # Get something new from the socket rx = self._socket.recv(4096).decode("utf-8") if not rx: break buf += rx def _respond(self, resp): """Respond to the person waiting""" response_queue = self._response_queues.get(timeout=0.1) response_queue.put(resp) self._completed_response_lines = [] self._is_multiline = None def _recv_loop(self): """Service socket recv, returning responses to the correct queue""" self._completed_response_lines = [] self._is_multiline = None lines_iterator = self._get_lines() while True: try: line = next(lines_iterator) if self._is_multiline is None: self._is_multiline = line.startswith("!") or line == "." if line.startswith("ERR"): self._respond(ValueError(line)) elif self._is_multiline: if line == ".": self._respond(self._completed_response_lines) else: assert ( line[0] == "!" ), f"Multiline response {repr(line)} doesn't start with !" self._completed_response_lines.append(line[1:]) else: self._respond(line) except StopIteration: return except Exception: log.exception("Exception receiving message") raise def _get_block_numbers(self): block_numbers = OrderedDict() for line in self.send_recv("*BLOCKS?\n"): block_name, number = line.split() block_numbers[block_name] = int(number) return block_numbers def parameterized_send(self, request, parameter_list): """Send batched requests for a list of parameters Args: request (str): Request to send, like "%s.*?\n" parameter_list (list): parameters to format with, like ["TTLIN", "TTLOUT"] Returns: dict: {parameter: response_queue} """ response_queues = OrderedDict() for parameter in parameter_list: response_queues[parameter] = self.send(request % parameter) return response_queues def get_blocks_data(self): blocks = OrderedDict() # Get details about number of blocks block_numbers = self._get_block_numbers() block_names = list(block_numbers) # Queue up info about each block desc_queues = self.parameterized_send("*DESC.%s?\n", block_names) field_queues = self.parameterized_send("%s.*?\n", block_names) # Create BlockData for each block # TODO: we sort here while server gives these in hash table order for block_name in sorted(block_names): number = block_numbers[block_name] description = strip_ok(self.recv(desc_queues[block_name])) fields = OrderedDict() blocks[block_name] = BlockData(number, description, fields) # Parse the field list unsorted_fields = {} for line in self.recv(field_queues[block_name]): split = line.split() assert len(split) in ( 3, 4, ), f"Expected field_data to have len 3 or 4, got {len(split)}" if len(split) == 3: split.append("") field_name, index, field_type, field_subtype = split unsorted_fields[field_name] = (int(index), field_type, field_subtype) # Sort the field list def get_field_index(field_name): return unsorted_fields[field_name][0] field_names = sorted(unsorted_fields, key=get_field_index) # Request description for each field field_desc_queues = self.parameterized_send( "*DESC.%s.%%s?\n" % block_name, field_names ) # Request enum labels for fields that are enums enum_fields = [] for field_name in field_names: _, field_type, field_subtype = unsorted_fields[field_name] if field_type in ("bit_mux", "pos_mux") or field_subtype == "enum": enum_fields.append(field_name) elif field_type == "ext_out": enum_fields.append(field_name + ".CAPTURE") enum_queues = self.parameterized_send( "*ENUMS.%s.%%s?\n" % block_name, enum_fields ) # Get desc and enum data for each field for field_name in field_names: _, field_type, field_subtype = unsorted_fields[field_name] if field_name in enum_queues: labels = self.recv(enum_queues[field_name]) elif field_name + ".CAPTURE" in enum_queues: labels = self.recv(enum_queues[field_name + ".CAPTURE"]) else: labels = [] description = strip_ok(self.recv(field_desc_queues[field_name])) fields[field_name] = FieldData( field_type, field_subtype, description, labels ) return blocks def get_pcap_bits_fields(self): # {field_to_set: [bit_names]} # E.g. {"PCAP.BITS0"=["TTLIN1.VAL", "TTLIN2.VAL", ...], ...} bits_fields = [] for line in self.send_recv("PCAP.*?\n"): split = line.split() if len(split) == 4: field_name, _, field_type, field_subtype = split if field_type == "ext_out" and field_subtype == "bits": bits_fields.append(f"PCAP.{field_name}") bits_queues = self.parameterized_send("%s.BITS?\n", sorted(bits_fields)) bits = OrderedDict() for k, queue in bits_queues.items(): bits[k + ".CAPTURE"] = self.recv(queue) return bits def get_changes(self, include_errors=False): table_queues = {} for line in self.send_recv("*CHANGES?\n"): if "=" in line: field, val = line.split("=", 1) elif line[-1] == "<": # table field = line[:-1] val = None table_queues[field] = self.send(f"{field}?\n") elif line.endswith("(error)"): if include_errors: field = line.split(" ", 1)[0] val = Exception else: continue else: log.warning("Can't parse line %r of changes", line) continue yield field, val for field, q in table_queues.items(): yield field, self.recv(q) def get_table_fields(self, block, field): fields = OrderedDict() enum_queues = {} for line in self.send_recv(f"{block}.{field}.FIELDS?\n"): split = line.split() name = split[1].strip() signed = False if len(split) > 2: # Field is an enum, get its values if split[2] == "enum": enum_queues[name] = self.send(f"*ENUMS.{block}.{field}[].{name}?\n") elif split[2] == "int": signed = True fields[name] = (split[0], signed) # Request description for each field desc_queues = self.parameterized_send( "*DESC.%s.%s[].%%s?\n" % (block, field), list(fields) ) for name, (bits_str, signed) in fields.items(): bits_hi, bits_lo = [int(x) for x in bits_str.split(":")] description = strip_ok(self.recv(desc_queues[name])) if name in enum_queues: labels = self.recv(enum_queues[name]) else: labels = None fields[name] = TableFieldData(bits_hi, bits_lo, description, labels, signed) return fields def get_field(self, block, field): try: resp = self.send_recv(f"{block}.{field}?\n") except ValueError as e: raise ValueError(f"Error getting {block}.{field}: {e}") else: return strip_ok(resp) def set_field(self, block, field, value): self.set_fields({f"{block}.{field}": value}) def set_fields(self, field_values): queues = OrderedDict() for field, value in field_values.items(): message = f"{field}={value}\n" queues[(field, value)] = self.send(message) for (field, value), queue in queues.items(): try: resp = self.recv(queue) except ValueError as e: raise ValueError(f"Error setting {field} to {value!r}: {e}") else: assert resp == "OK", f"Expected OK, got {resp!r}" def set_table(self, block, field, int_values): lines = [f"{block}.{field}<\n"] lines += [f"{int_value}\n" for int_value in int_values] lines += ["\n"] resp = self.send_recv("".join(lines)) assert resp == "OK", f"Expected OK, got {resp!r}"
def run_command(key): """ keylist.append({'taskId': i, 'job_number': job_number, 'total_input': numTasks, 'write_element_size': write_element_size, 'process_time': process_time, 'total_time': total_time}) """ #pywren.wrenlogging.default_config('INFO') begin_of_function = time.time() logger = logging.getLogger(__name__) logger.info("taskId = " + str(key['taskId'])) taskId = key['taskId'] jobid_int = int(key['job_number']) write_element_size = int(key['write_element_size']) process_time = int(key['process_time']) total_time = int(key['total_time']) #pocket_namenode = pocket.connect("10.1.0.10", 9070) [read_time, work_time, write_time] = [0] * 3 start_time = time.time() # a total of 10 threads number_of_clients = 1 write_pool = ThreadPool(number_of_clients) time.sleep(process_time) logger.info("Process finish here: " + str(time.time())) def write_work_client(writer_key): start_time = time.time() client_id = int(writer_key['client_id']) taskID = writer_key['taskId'] jobID = writer_key['jobid'] datasize = writer_key['write_element_size'] #datasize = 1310720 total_time = writer_key['total_time'] logging.info(total_time) body = b'a' * datasize client_id = int(client_id) count = 0 while time.time() < start_time + total_time: count = count + 1 keyname = str(taskID) + "-" + str(count) m = hashlib.md5() m.update(keyname.encode('utf-8')) randomized_keyname = str(taskID) + '-' + m.hexdigest()[:8] + '-' + str(count) #logger.info("(" + str(taskId) + ")" + "The name of the key to write is: " + randomized_keyname) logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "S") #r = pocket.put_buffer_bytes(pocket_namenode, body, len(body), randomized_keyname, jobid) #logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E " + str(r) ) logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E ") logger.info("Write finish here: " + str(time.time())) writer_keylist = [] number_of_clients = int(number_of_clients) for i in range(number_of_clients): writer_keylist.append({'client_id': i, 'taskId': taskId, 'jobid': jobid_int, 'write_element_size': write_element_size, 'total_time': total_time}) write_pool_handler_container = [] write_pool_handler = write_pool.map_async(write_work_client, writer_keylist) write_pool_handler_container.append(write_pool_handler) start_time = time.time() if len(write_pool_handler_container) > 0: write_pool_handler = write_pool_handler_container.pop() write_pool_handler.wait() twait_end = time.time() #logger.info("(" + str(taskId) + ")" + 'last write time = ' + str(twait_end - t3)) write_time = twait_end - start_time write_pool.close() write_pool.join() end_of_function = time.time() return begin_of_function, end_of_function, read_time, work_time, write_time
class ApiClient(object): """ Generic API client for Swagger client library builds. Swagger generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of the methods and models for each application are generated from the Swagger templates. NOTE: This class is auto generated by the swagger code generator program. Ref: https://github.com/swagger-api/swagger-codegen Do not edit the class manually. :param configuration: .Configuration object for this client :param header_name: a header to pass when making calls to the API. :param header_value: a header value to pass when making calls to the API. :param cookie: a cookie to include in the header when making calls to the API """ PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types NATIVE_TYPES_MAPPING = { 'int': int, 'long': int if PY3 else long, 'float': float, 'str': str, 'bool': bool, 'date': date, 'datetime': datetime, 'object': object, } def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration self.pool = ThreadPool() self.rest_client = RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/1.0.0/python' ########### Change # Store last api call metadata self.last_metadata = {} ########### End Change def __del__(self): self.pool.close() self.pool.join() @property def user_agent(self): """User agent for this API client""" return self.default_headers['User-Agent'] @user_agent.setter def user_agent(self, value): self.default_headers['User-Agent'] = value def set_default_header(self, header_name, header_value): self.default_headers[header_name] = header_value ########### Change def metadata_wrapper(fn): """Save metadata of last api call.""" @functools.wraps(fn) def wrapped_f(self, *args, **kwargs): self.last_metadata = {} self.last_metadata["url"] = self.configuration.host + args[0] self.last_metadata["method"] = args[1] self.last_metadata["timestamp"] = time.time() try: return fn(self, *args, **kwargs) except Exception as e: self.last_metadata["exception"] = e raise return wrapped_f def get_last_metadata(self): return self.last_metadata ########### End Change @metadata_wrapper def __call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): config = self.configuration # header parameters header_params = header_params or {} header_params.update(self.default_headers) if self.cookie: header_params['Cookie'] = self.cookie if header_params: header_params = self.sanitize_for_serialization(header_params) header_params = dict( self.parameters_to_tuples(header_params, collection_formats)) # path parameters if path_params: path_params = self.sanitize_for_serialization(path_params) path_params = self.parameters_to_tuples(path_params, collection_formats) for k, v in path_params: # specified safe chars, encode everything resource_path = resource_path.replace( '{%s}' % k, quote(str(v), safe=config.safe_chars_for_path_param)) # query parameters if query_params: query_params = self.sanitize_for_serialization(query_params) query_params = self.parameters_to_tuples(query_params, collection_formats) # post parameters if post_params or files: post_params = self.prepare_post_parameters(post_params, files) post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) # auth setting self.update_params_for_auth(header_params, query_params, auth_settings) # body if body: body = self.sanitize_for_serialization(body) # request url url = self.configuration.host + resource_path # perform request and return response response_data = self.request(method, url, query_params=query_params, headers=header_params, post_params=post_params, body=body, _preload_content=_preload_content, _request_timeout=_request_timeout) self.last_response = response_data return_data = response_data if _preload_content: # deserialize response data if response_type: return_data = self.deserialize(response_data, response_type) else: return_data = None ########### Change self.last_metadata["response"] = response_data self.last_metadata["return_data"] = return_data ########### End Change if _return_http_data_only: return (return_data) else: return (return_data, response_data.status, response_data.getheaders()) def sanitize_for_serialization(self, obj): """ Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is swagger model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. """ if obj is None: return None elif isinstance(obj, self.PRIMITIVE_TYPES): return obj elif isinstance(obj, list): return [ self.sanitize_for_serialization(sub_obj) for sub_obj in obj ] elif isinstance(obj, tuple): return tuple( self.sanitize_for_serialization(sub_obj) for sub_obj in obj) elif isinstance(obj, (datetime, date)): return obj.isoformat() if isinstance(obj, dict): obj_dict = obj else: # Convert model obj to dict except # attributes `swagger_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = { obj.attribute_map[attr]: getattr(obj, attr) for attr, _ in iteritems(obj.swagger_types) if getattr(obj, attr) is not None } return { key: self.sanitize_for_serialization(val) for key, val in iteritems(obj_dict) } def deserialize(self, response, response_type): """ Deserializes response into an object. :param response: RESTResponse object to be deserialized. :param response_type: class literal for deserialized object, or string of class name. :return: deserialized object. """ # handle file downloading # save response body into a tmp file and return the instance if response_type == "file": return self.__deserialize_file(response) # fetch data from response object try: data = json.loads(response.data) except ValueError: data = response.data return self.__deserialize(data, response_type) def __deserialize(self, data, klass): """ Deserializes dict, list, str into an object. :param data: dict, list or str. :param klass: class literal, or string of class name. :return: object. """ if data is None: return None if type(klass) == str: if klass.startswith('list['): sub_kls = re.match('list\[(.*)\]', klass).group(1) return [ self.__deserialize(sub_data, sub_kls) for sub_data in data ] if klass.startswith('dict('): sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2) return { k: self.__deserialize(v, sub_kls) for k, v in iteritems(data) } # convert str to class if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: klass = getattr(models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) elif klass == object: return self.__deserialize_object(data) elif klass == date: return self.__deserialize_date(data) elif klass == datetime: return self.__deserialize_datatime(data) else: return self.__deserialize_model(data, klass) def call_api(self, resource_path, method, path_params=None, query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, asynchronous=None, _return_http_data_only=None, collection_formats=None, _preload_content=True, _request_timeout=None): """ Makes the HTTP request (synchronous) and return the deserialized data. To make an async request, set the asynchronous parameter. :param resource_path: Path to method endpoint. :param method: Method to call. :param path_params: Path parameters in the url. :param query_params: Query parameters in the url. :param header_params: Header parameters to be placed in the request header. :param body: Request body. :param post_params dict: Request post form parameters, for `application/x-www-form-urlencoded`, `multipart/form-data`. :param auth_settings list: Auth Settings names for the request. :param response: Response data type. :param files dict: key -> filename, value -> filepath, for `multipart/form-data`. :param asynchronous bool: execute request asynchronously :param _return_http_data_only: response data without head status code and headers :param collection_formats: dict of collection formats for path, query, header, and post parameters. :param _preload_content: if False, the urllib3.HTTPResponse object will be returned without reading/decoding response data. Default is True. :param _request_timeout: timeout setting for this request. If one number provided, it will be total request timeout. It can also be a pair (tuple) of (connection, read) timeouts. :return: If asynchronous parameter is True, the request will be called asynchronously. The method will return the request thread. If parameter asynchronous is False or missing, then the method will return the response directly. """ if not asynchronous: return self.__call_api(resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout) else: thread = self.pool.apply_async( self.__call_api, (resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout)) return thread def request(self, method, url, query_params=None, headers=None, post_params=None, body=None, _preload_content=True, _request_timeout=None): """ Makes the HTTP request using RESTClient. """ if method == "GET": return self.rest_client.GET(url, query_params=query_params, _preload_content=_preload_content, _request_timeout=_request_timeout, headers=headers) elif method == "HEAD": return self.rest_client.HEAD(url, query_params=query_params, _preload_content=_preload_content, _request_timeout=_request_timeout, headers=headers) elif method == "OPTIONS": return self.rest_client.OPTIONS(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "POST": return self.rest_client.POST(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "PUT": return self.rest_client.PUT(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "PATCH": return self.rest_client.PATCH(url, query_params=query_params, headers=headers, post_params=post_params, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) elif method == "DELETE": return self.rest_client.DELETE(url, query_params=query_params, headers=headers, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) else: raise ValueError("http method must be `GET`, `HEAD`, `OPTIONS`," " `POST`, `PATCH`, `PUT` or `DELETE`.") def parameters_to_tuples(self, params, collection_formats): """ Get parameters as list of tuples, formatting collections. :param params: Parameters as dict or list of two-tuples :param dict collection_formats: Parameter collection formats :return: Parameters as list of tuples, collections formatted """ new_params = [] if collection_formats is None: collection_formats = {} for k, v in iteritems(params) if isinstance(params, dict) else params: if k in collection_formats: collection_format = collection_formats[k] if collection_format == 'multi': new_params.extend((k, value) for value in v) else: if collection_format == 'ssv': delimiter = ' ' elif collection_format == 'tsv': delimiter = '\t' elif collection_format == 'pipes': delimiter = '|' else: # csv is the default delimiter = ',' new_params.append( (k, delimiter.join(str(value) for value in v))) else: new_params.append((k, v)) return new_params ########### Change def prepare_post_parameters(self, post_params=None, files=None): """ Builds form parameters. :param post_params: Normal form parameters. :param files: File parameters. :return: Form parameters with files. """ params = post_params or [] for key, values in (files or {}).items(): for maybe_file_or_path in values if isinstance(values, list) else [values]: try: # use the parameter as if it was an open file object data = maybe_file_or_path.read() maybe_file_or_path = maybe_file_or_path.name except AttributeError: # then it is presumably a file path with open(maybe_file_or_path, 'rb') as fh: data = fh.read() basepath = os.path.basename(maybe_file_or_path) mimetype = mimetypes.guess_type( basepath)[0] or 'application/octet-stream' params.append((key, (basepath, data, mimetype))) return params ########### End Change def select_header_accept(self, accepts): """ Returns `Accept` based on an array of accepts provided. :param accepts: List of headers. :return: Accept (e.g. application/json). """ if not accepts: return accepts = [x.lower() for x in accepts] if 'application/json' in accepts: return 'application/json' else: return ', '.join(accepts) def select_header_content_type(self, content_types): """ Returns `Content-Type` based on an array of content_types provided. :param content_types: List of content-types. :return: Content-Type (e.g. application/json). """ if not content_types: return 'application/json' content_types = [x.lower() for x in content_types] if 'application/json' in content_types or '*/*' in content_types: return 'application/json' else: return content_types[0] def update_params_for_auth(self, headers, querys, auth_settings): """ Updates header and query params based on authentication setting. :param headers: Header parameters dict to be updated. :param querys: Query parameters tuple list to be updated. :param auth_settings: Authentication setting identifiers list. """ if not auth_settings: return for auth in auth_settings: auth_setting = self.configuration.auth_settings().get(auth) if auth_setting: if not auth_setting['value']: continue elif auth_setting['in'] == 'header': headers[auth_setting['key']] = auth_setting['value'] elif auth_setting['in'] == 'query': querys.append((auth_setting['key'], auth_setting['value'])) else: raise ValueError( 'Authentication token must be in `query` or `header`') def __deserialize_file(self, response): """ Saves response body into a file in a temporary folder, using the filename from the `Content-Disposition` header if provided. :param response: RESTResponse. :return: file path. """ fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) os.close(fd) os.remove(path) content_disposition = response.getheader("Content-Disposition") if content_disposition: filename = re.\ search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', content_disposition).\ group(1) path = os.path.join(os.path.dirname(path), filename) with open(path, "w") as f: f.write(response.data) return path def __deserialize_primitive(self, data, klass): """ Deserializes string to primitive type. :param data: str. :param klass: class literal. :return: int, long, float, str, bool. """ try: return klass(data) except UnicodeEncodeError: return unicode(data) except TypeError: return data def __deserialize_object(self, value): """ Return a original value. :return: object. """ return value def __deserialize_date(self, string): """ Deserializes string to date. :param string: str. :return: date. """ try: from dateutil.parser import parse return parse(string).date() except ImportError: return string except ValueError: raise ApiException( status=0, reason="Failed to parse `{0}` into a date object".format( string)) def __deserialize_datatime(self, string): """ Deserializes string to datetime. The string should be in iso8601 datetime format. :param string: str. :return: datetime. """ try: from dateutil.parser import parse return parse(string) except ImportError: return string except ValueError: raise ApiException( status=0, reason=("Failed to parse `{0}` into a datetime object".format( string))) def __deserialize_model(self, data, klass): """ Deserializes list or dict to model. :param data: dict, list. :param klass: class literal. :return: model object. """ if not klass.swagger_types and not hasattr(klass, 'get_real_child_model'): return data kwargs = {} if klass.swagger_types is not None: for attr, attr_type in iteritems(klass.swagger_types): if data is not None \ and klass.attribute_map[attr] in data \ and isinstance(data, (list, dict)): value = data[klass.attribute_map[attr]] kwargs[attr] = self.__deserialize(value, attr_type) instance = klass(**kwargs) if hasattr(instance, 'get_real_child_model'): klass_name = instance.get_real_child_model(data) if klass_name: instance = self.__deserialize(data, klass_name) return instance
def read_async(iterable, urlkey=None, max_workers=None, blocksize=1024 * 1024, decode=None, raise_http_err=True, timeout=None, unordered=True, openers=None, **kwargs): # pylint:disable=too-many-arguments """ Wrapper around `multiprocessing.pool.ThreadPool()` for downloading data from urls in `iterable` asynchronously with remarkable performance boost for large downloads. Each download is executed on a separate *worker thread*, yielding the result of each `url` read. Yields the tuple: ``` obj, result, exc, url ``` where: - `obj` is the element of `iterable` which originated the `urlread` call - `result` is the result of `urlread`, it is None in case of errors (see `exc` below). Otherwise, it is the tuple ```(data, status_code, message)``` where: * `data` is the data read (as bytes or string if `decode != None`). It can be None when `raise_http_err=False` and an HTTPException occurred * `status_code` is the integer denoting the status code (e.g. 200), and * `messsage` the string denoting the status message (e.g., 'OK'). - `exc` is the exception raised by `urlread`, if any. **Either `result` or `exc` are None, but not both**. Note that `exc` is one of the following URL-related exceptions: ```urllib2.URLError, httplib.HTTPException, socket.error``` Any other exception is raised and will stop the download - `url` is the original url (either string or Request object). If `iterable` is an iterable of `Request` objects or url strings, then `url` is equal to `obj` Note that if `raise_http_err=False` then `HTTPError`s are treated as 'normal' response and will be yielded in `result` as a tuple where `data=None` and `status_code` is most likely greater or equal to 400. Finally, this function can cleanly cancel yet-to-be-processed *worker threads* via Ctrl+C if executed from the command line. In the following we will simply refer to `urlread` to indicate the `urllib2.urlopen.read` function. :param iterable: an iterable of objects representing the urls addresses to be read: if its elements are neither strings nor `Request` objects, the `urlkey` argument (see below) must be specified to map each element to a valid url string or Request :param urlkey: function or None. When None (the default), all elements of `iterable` must be url strings or Request objects. When function, it will be called with each element of `iterable` as argument, and must return the mapped url address or Request. :param max_workers: integer or None (the default) denoting the max workers of the `ThreadPoolExecutor`. When None, the theads allocated are relative to the machine cpu :param blocksize: integer defaulting to 1024*1024 specifying, when connecting to one of the given urls, the mximum number of bytes to be read at each call of `urlopen.read`. If the size argument is negative or omitted, read all data until EOF is reached :param decode: string or None (default: None) optional argument specifying if the content of the url must be decoded. None means: return the byte string as it was read. Otherwise, use this argument for string content (not bytes) by supplying a decoding, such as e.g. 'utf8' :param raise_http_err: boolean (True by default) tells whether `HTTPError`s should be yielded as exceptions or not. When False, `HTTPError`s are yielded as normal responses in `result` as the tuple `(None, status_code, message)` (where `status_code` is most likely greater or equal to 400) :param timeout: timeout parameter specifies a timeout in seconds for blocking operations like the connection attempt (if not specified, None or non-positive, the global default timeout setting will be used). This actually only works for HTTP, HTTPS and FTP connections. :param unordered: boolean (default False): tells whether the download results are yielded in the same order they are input in `iterable`. Theoretically (tests did not show any remarkable difference), False (the default) might execute faster, but results are not guaranteed to be yielded in the same order as `iterable`. :param openers: a function behaving like `urlkey`, should return a specific opener for the given item of iterable. When None, the default urllib opener is used See :func:`get_opener` for, e.g., creating an opener from a base url, user and passowrd :param kwargs: optional arguments to be passed to the underlying python `urlopen` function. These arguments are ignored if a custom `openers` function is provided Notes: ====== ThreadPool vs ThreadPoolExecutor -------------------------------- This function changed from using `concurrent.futures.ThreadPoolExecutor` into the "old" `multiprocessing.pool.ThreadPool`: the latter consumes in most cases less memory (about 30% less), especially if `iterable` is not a list in memory but a python iterable (`concurrent.futures.ThreadPoolExecutor` builds a `set` of `Future`s object from `iterable`, whereas `multiprocessing.pool.ThreadPool` seems just to execute each element in iterable) killing threads / handling exceptions ------------------------------------- This function handles any kind of unexpected exception (particularly relevant in case of e.g., `KeyboardInterrupt`) by canceling all worker threads before raising. As ThreadPoolExecutor returns (or raises) after all worker threads have finished, an internal boolean flag makes all remaining worker threads quit as soon as possible, making the function return (or raise) much more quickly """ # flag for CTRL-C or cancelled tasks kill = False # function called from within urlread to check if go on or not def urlwrapper(obj): if kill: return None url = urlkey(obj) if urlkey is not None else obj opener = openers(obj) if openers is not None else None try: return obj, \ urlread(url, blocksize, decode, True, raise_http_err, timeout, opener, **kwargs), \ None, url except URLException as urlexc: return obj, None, urlexc.exc, url tpool = ThreadPool(max_workers) threadpoolmap = tpool.imap_unordered if unordered else tpool.imap # (func, iterable, chunksize) # note above: chunksize argument for threads (not processes) # seems to slow down download. Omit the argument and leave chunksize=1 (default) try: # this try is for the keyboard interrupt, which will be caught inside the # as_completed below for result_tuple in threadpoolmap(urlwrapper, iterable): if kill: continue # (for safety: we should never enter here) yield result_tuple except: # According to this post: # http://stackoverflow.com/questions/29177490/how-do-you-kill-futures-once-they-have-started, # after a KeyboardInterrupt this method does not return until all # working threads have finished. Thus, we implement the `kill` flag # which makes them exit immediately, and hopefully this function will return within # seconds at most. We catch a bare except cause we want the same to apply to all # other exceptions which we might raise (see few line above) kill = True # the time here before executing 'raise' below is the time taken to finish all threads. # Without the line above, it might be a lot (minutes, hours), now it is much shorter # (in the order of few seconds max) and the command below can be executed quickly: raise tpool.close()
def new_order(session, w_id, d_id, c_id, num_items, item_number, supplier_warehouse, quantity): # Step 1 n = 0 n = utils.single_select( session, 'SELECT D_O_ID_OFST from district WHERE D_W_ID = %s AND D_ID = %s', (w_id, d_id)) n += utils.single_select( session, 'SELECT D_O_COUNTER from district_counters WHERE D_W_ID = %s AND D_ID = %s', (w_id, d_id)) # Step 2 utils.do_query( session, 'UPDATE district_counters SET D_O_COUNTER = D_O_COUNTER + 1 WHERE D_W_ID = %s AND D_ID = %s', (w_id, d_id)) # Step 3 all_local = 1 for i in range(num_items): if supplier_warehouse[i] != w_id: all_local = 0 break current_datetime = datetime.now() # Step 4 & 5 total_amount = 0 item_amount = [0] * num_items adjusted_qty = [0] * num_items cql_insert_item_orders = session.prepare( "INSERT INTO item_orders (W_ID, I_ID, O_ID, D_ID, C_ID) VALUES (?, ?, ?, ?, ?)" ) def handle_item(i): nonlocal total_amount # for i in range(num_items): # Step 5a s_quantity = utils.single_select( session, 'SELECT S_QUANTITY FROM stock WHERE S_W_ID = %s AND S_I_ID = %s', (supplier_warehouse[i], item_number[i])) # Step 5b adjusted_qty[i] = s_quantity - quantity[i] # Step 5c if adjusted_qty[i] < 10: adjusted_qty[i] += 100 # Step 5d utils.do_query( session, 'UPDATE stock SET S_QUANTITY = %s WHERE S_W_ID = %s AND S_I_ID = %s', (adjusted_qty[i], supplier_warehouse[i], item_number[i])) utils.do_query( session, ''' UPDATE stock_counters SET S_YTD_CHANGE = S_YTD_CHANGE + %s, S_ORDER_CNT_CHANGE = S_ORDER_CNT_CHANGE + 1 WHERE S_W_ID = %s AND S_I_ID = %s ''', (quantity[i], supplier_warehouse[i], item_number[i])) if supplier_warehouse[i] != w_id: utils.do_query( session, ''' UPDATE stock_counters SET S_REMOTE_CNT_CHANGE = S_REMOTE_CNT_CHANGE + 1 WHERE S_W_ID = %s AND S_I_ID = %s ''', (supplier_warehouse[i], item_number[i])) # Step 5e i_price = utils.single_select( session, 'SELECT I_PRICE FROM item WHERE I_ID = %s', (item_number[i], )) item_amount[i] = quantity[i] * i_price # Step 5f total_amount += item_amount[i] # Step 5g dist_name = 'S_DIST_' + str(d_id) dist_info = utils.single_select( session, 'SELECT {} FROM stock WHERE S_W_ID = {} AND S_I_ID = {}'.format( dist_name, supplier_warehouse[i], item_number[i])) utils.do_query( session, ''' INSERT INTO order_line (OL_O_ID, OL_D_ID, OL_W_ID, OL_NUMBER, OL_I_ID, OL_SUPPLY_W_ID, OL_QUANTITY, OL_AMOUNT, OL_DIST_INFO) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ''', (n, d_id, w_id, i, item_number[i], supplier_warehouse[i], quantity[i], item_amount[i], dist_info)) # Populate the item_orders table for each item-order pair utils.do_query(session, cql_insert_item_orders, (w_id, item_number[i], n, d_id, c_id), query_type='write') pool = ThreadPool(8) pool.map(handle_item, range(num_items)) pool.close() # Create order after creating all order-lines, so that when querying for popular items there will be no error utils.do_query( session, ''' INSERT INTO orders (O_ID, O_D_ID, O_W_ID, O_C_ID, O_ENTRY_D, O_OL_CNT, O_ALL_LOCAL) VALUES (%s, %s, %s, %s, %s, %s, %s) ''', (n, d_id, w_id, c_id, datetime.now(), num_items, all_local)) # Step 6 w_tax = utils.single_select(session, 'SELECT W_TAX FROM warehouse WHERE W_ID = %s', (w_id, )) d_tax = utils.single_select( session, 'SELECT D_TAX FROM district WHERE D_W_ID = %s AND D_ID = %s', (w_id, d_id)) c_discount = utils.single_select( session, 'SELECT C_DISCOUNT FROM customer WHERE C_W_ID = %s AND C_D_ID = %s AND C_ID = %s', (w_id, d_id, c_id)) total_amount *= (1 + d_tax + w_tax) * (1 - c_discount) # Output output = {} output['w_id'] = w_id output['d_id'] = d_id output['c_id'] = c_id rows = utils.do_query( session, 'SELECT C_LAST, C_CREDIT, C_DISCOUNT FROM customer WHERE C_W_ID = %s AND C_D_ID = %s AND C_ID = %s', (w_id, d_id, c_id)) for row in rows: output['c_last'] = row.c_last output['c_credit'] = row.c_credit output['c_discount'] = row.c_discount break output['w_tax'] = w_tax output['d_tax'] = d_tax output['o_id'] = n output['o_entry_d'] = current_datetime output['num_items'] = num_items output['total_amount'] = total_amount output['item_infos'] = [] for i in range(num_items): i_name = utils.single_select( session, 'SELECT I_NAME FROM item WHERE I_ID = %s', (item_number[i], )) output['item_infos'].append( (item_number[i], i_name, supplier_warehouse[i], quantity[i], item_amount[i], adjusted_qty[i])) return output
my_dandelion = Dandelion() my_dandelion.doi = my_dict["doi"] my_dandelion.num_events = my_dict["num_events"] db.session.add(my_dandelion) # but don't append it; it doesn't need to get run safe_commit(db) print "now calling dandelion" use_multithreaded = False if use_multithreaded: my_thread_pool = ThreadPool(50) results = my_thread_pool.imap_unordered( call_dandelion_on_article, my_dandelions) my_thread_pool.close() my_thread_pool.join() my_thread_pool.terminate() else: results = [] for my_dandelion in my_dandelions: results.append(call_dandelion_on_article(my_dandelion)) try: for (my_result, my_error, rate_limit_exceeded) in results: if rate_limit_exceeded: print "sleeping for a few minutes because rate_limit_exceeded", my_error sleep(60 * 5) except Exception as e: print e
def multi_thread_do_job(l, func=do_job, size=threads_number): tp = ThreadPool(size) results = tp.map(func, l) tp.close() tp.join() return results
def initialize_sensor(sensor_type): ### Function for initializing sensor. # sensor_type: Type of sensor; currently supported: "MLX90393", "BNO055" if sensor_type == 'MLX90393': initialized = False while initialized == False: # Set up serial communication with Arduino(s) port1 = '/dev/ttyUSB0' ser1 = serial.Serial(port1, 115200, timeout=2) ser1.flushInput() port2 = '/dev/ttyUSB1' ser2 = serial.Serial(port2, 115200, timeout=2) ser2.flushInput() pool = ThreadPool(processes=2) collectData_result1 = pool.apply_async(collectData, ('MLX90393', 9, ser1)) collectData_result2 = pool.apply_async(collectData, ('MLX90393', 9, ser2)) pool.close() pool.join() #print(collectData_result1.get()) data_error = False try: temp = collectData_result1.get() if 0.0 in temp: data_error = True print('Error in left tripod sensor(s)') except: data_error = True print('Error in left tripod sensor(s)') try: temp = collectData_result2.get() if 0.0 in temp: data_error = True print('Error in right tripod sensor(s)') except: data_error = True print('Error in right tripod sensor(s)') if data_error: print( 'Failed to initialize one or more magnetometers, trying again...' ) ser1.close() ser2.close() time.sleep(1) else: initialized = True print('Initialized magnetometers.') time.sleep(1) return [ser1, ser2] elif sensor_type == 'BNO055': initialized = False while initialized == False: # Set up serial communication with Arduino(s) port3 = '/dev/ttyUSB2' ser3 = serial.Serial(port3, 115200, timeout=2) ser3.flushInput() data_error = False try: result1 = collectData('BNO055', 6, ser3) except: data_error = True if data_error: print('Failed to initialize IMU, trying again...') ser3.close() time.sleep(1) else: initialized = True print('Initialized IMU.') time.sleep(1) return ser3
def process_callbacks(self, callback_collection, kwargs): """ Processes a collection of callbacks or hooks for a particular event, namely pre, hook or post. The functions are passed in as an array to ``callback_collection`` and process callbacks first iterates each function and ensures that each one has the correct arguments available to it. If not, an Exception is raised. Then, depending on whether Threading is enabled or not, the functions are either run sequentially, or loaded into a ThreadPool and executed asynchronously. The returned local and global updates are either collected and processed sequentially, as in the case of the non-threaded behaviour, or collected at the end of the callback_collection processing and handled there. Note: It is impossible to predict the order of the functions being run. If the order is important, it is advised to create a second event hook that will be fired before the other. Rigger has no concept of hook or callback order and is unlikely to ever have. Args: callback_collection: A list of functions to call. kwargs: A set of kwargs to pass to the functions. Returns: A tuple of local and global namespace updates. """ loc_collect = {} glo_collect = {} if self._threaded: results_list = [] pool = ThreadPool(10) for cb in callback_collection: required_args = [ sig for sig in cb['args'] if isinstance(cb['args'][sig].default, type) ] missing = list( set(required_args).difference(set( self.global_data.keys())).difference(set(kwargs.keys()))) if not missing: new_kwargs = self.build_kwargs(cb['args'], kwargs) if self._threaded: results_list.append( pool.apply_async(cb['func'], [], new_kwargs)) else: obtain_result = self.handle_results( cb['func'], [], new_kwargs) loc_collect, glo_collect = self.handle_collects( obtain_result, loc_collect, glo_collect) else: raise Exception('Function {} is missing kwargs {}'.format( cb['func'].__name__, missing)) if self._threaded: pool.close() pool.join() for result in results_list: obtain_result = self.handle_results(result.get, [], {}) loc_collect, glo_collect = self.handle_collects( obtain_result, loc_collect, glo_collect) return loc_collect, glo_collect
class StationXMLNetworkCombinerTask(CombinerTask): """ Task downloading and combining `StationXML <http://www.fdsn.org/xml/station/>`_ information for a network element. Downloading is performed concurrently. :param list routes: Routes to combine. Must belong to exclusively a single network code. .. note:: *StationXML* :code:`BaseNodeType` elements by definition (http://www.fdsn.org/xml/station/fdsn-station-1.0.xsd) are ordered using :code:`<xs:sequence></sequence>`. This fact is used when merging StationXML :code`BaseNodeType` elements. """ # TODO(damb): The combiner has to write metadata to the log database. # Also in case of errors. # Besides of processors this combiner has to log since it is the instance # collecting and analyzing DownloadTask results. LOGGER = 'flask.app.federator.task_combiner_stationxml' POOL_SIZE = 5 NETWORK_TAG = settings.STATIONXML_ELEMENT_NETWORK STATION_TAG = settings.STATIONXML_ELEMENT_STATION CHANNEL_TAG = settings.STATIONXML_ELEMENT_CHANNEL def __init__(self, routes, query_params, **kwargs): nets = set([se.network for route in routes for se in route.streams]) # TODO(damb): Use assert instead if len(nets) != 1: raise ValueError('Routes must belong exclusively to a single ' 'network code.') super().__init__(routes, query_params, logger=self.LOGGER, **kwargs) self._level = self.query_params.get('level', 'station') self._network_elements = [] self.path_tempfile = None def _clean(self, result): self.logger.debug('Removing temporary file {!r} ...'.format( result.data)) if (result.data and self._keep_tempfiles not in (KeepTempfiles.ALL, KeepTempfiles.ON_ERRORS)): try: os.remove(result.data) except OSError: pass def _run(self): """ Combine `StationXML <http://www.fdsn.org/xml/station/>`_ :code:`<Network></Network>` information. """ self.logger.info('Executing task {!r} ...'.format(self)) self._pool = ThreadPool(processes=self._num_workers) for route in self._routes: self.logger.debug( 'Creating DownloadTask for route {!r} ...'.format(route)) ctx = Context() self._ctx.append(ctx) t = RawDownloadTask(GranularFdsnRequestHandler( route.url, route.streams[0], query_params=self.query_params), decode_unicode=True, context=ctx, keep_tempfiles=self._keep_tempfiles, http_method=self._http_method) # apply DownloadTask asynchronoulsy to the worker pool result = self._pool.apply_async(t) self._results.append(result) self._pool.close() # fetch results ready while True: ready = [] for result in self._results: if result.ready(): _result = result.get() if _result.status_code == 200: if self._level in ('channel', 'response'): # merge <Channel></Channel> elements into # <Station></Station> from the correct # <Network></Network> epoch element for _net_element in self._extract_net_elements( _result.data): # find the correct <Network></Network> epoch # element net_element, known = self._emerge_net_element( _net_element, exclude_tags=[ '{}{}'.format(ns, self.STATION_TAG) for ns in settings.STATIONXML_NAMESPACES ]) if not known: continue # append/merge station elements for sta_element in \ self._emerge_sta_elements( _net_element): self._merge_sta_element( net_element, sta_element) elif self._level == 'station': # append <Station></Station> elements to the # corresponding <Network></Network> epoch for _net_element in self._extract_net_elements( _result.data): net_element, known = self._emerge_net_element( _net_element, exclude_tags=[ '{}{}'.format(ns, self.STATION_TAG) for ns in settings.STATIONXML_NAMESPACES ]) if not known: continue # append station elements # NOTE(damb): <Station></Station> elements # defined by multiple EIDA nodes are simply # appended; no merging is performed for sta_element in \ self._emerge_sta_elements( _net_element): net_element.append(sta_element) elif self._level == 'network': for net_element in self._extract_net_elements( _result.data): _, _ = self._emerge_net_element(net_element) self._clean(_result) self._sizes.append(_result.length) else: self._handle_error(_result) self._sizes.append(0) ready.append(result) for result in ready: self._results.remove(result) if not self._results: break if self._has_inactive_ctx(): self.logger.debug('{}: Closing ...'.format(self.name)) self._terminate() raise self.MissingContextLock self._pool.join() if not sum(self._sizes): self.logger.warning( 'Task {!r} terminates with no valid result.'.format(self)) return Result.nocontent(extras={'type_task': self._TYPE}) _length = 0 # dump xml tree for <Network></Network> epochs to temporary file self.path_tempfile = get_temp_filepath() self.logger.debug('{}: tempfile={!r}'.format(self, self.path_tempfile)) with open(self.path_tempfile, 'wb') as ofd: for net_element in self._network_elements: s = etree.tostring(net_element) _length += len(s) ofd.write(s) if self._has_inactive_ctx(): raise self.MissingContextLock self.logger.info( ('Task {!r} sucessfully finished ' '(total bytes processed: {}, after processing: {}).').format( self, sum(self._sizes), _length)) return Result.ok(data=self.path_tempfile, length=_length, extras={'type_task': self._TYPE}) def _emerge_net_element(self, net_element, exclude_tags=[]): """ Emerge a :code:`<Network></Network>` epoch element. If the :code:`<Network></Network>` element is unknown it is automatically appended to the list of already existing network elements. :param net_element: Emerge a network epoch element :type net_element: :py:class:`lxml.etree.Element` :param list exclude_tags: List of child element tags to be excluded while comparing :returns: Tuple of :code:`net_element` or a reference to an already existing network epoch element and a boolean value if the network element already is known (:code:`True`) else :code:`False` :rtype: tuple """ for existing_net_element in self._network_elements: if elements_equal(net_element, existing_net_element, exclude_tags, recursive=True): return existing_net_element, True self._network_elements.append(net_element) return net_element, False def _emerge_sta_elements(self, net_element, namespaces=settings.STATIONXML_NAMESPACES): """ Generator function emerging :code:`<Station><Station>` elements from :code:`<Network></Network>` tree. :param net_element: Network epoch `StationXML <http://www.fdsn.org/xml/station/>`_ element :type net_element: :py:class:`lxml.etree.Element` :param list namespaces: List of XML namespaces to be taken into consideration. """ station_tags = [ '{}{}'.format(ns, self.STATION_TAG) for ns in namespaces ] for tag in station_tags: for sta_element in net_element.findall(tag): yield sta_element def _emerge_cha_elements(self, sta_element, namespaces=settings.STATIONXML_NAMESPACES): """ Generator function emerging :code:`<Channel><Channel>` elements from :code:`<Station></Station>` tree. """ channel_tags = [ '{}{}'.format(ns, self.CHANNEL_TAG) for ns in namespaces ] for tag in channel_tags: for cha_element in sta_element.findall(tag): yield cha_element def _extract_net_elements(self, path_xml, namespaces=settings.STATIONXML_NAMESPACES): """ Extract :code:`<Network></Network>` epoch elements from `StationXML <http://www.fdsn.org/xml/station/>`_. :param str path_xml: Path to `StationXML <http://www.fdsn.org/xml/station/>`_ file. """ network_tags = [ '{}{}'.format(ns, self.NETWORK_TAG) for ns in namespaces ] with open(path_xml, 'rb') as ifd: station_xml = etree.parse(ifd).getroot() return [ net_element for net_element in station_xml.iter(*network_tags) ] def _merge_sta_element(self, net_element, sta_element, namespaces=settings.STATIONXML_NAMESPACES): """ Merges a *StationXML* :code:`<Station></Station>` epoch element into a :code:`<Network></Network>` epoch element. Merging is performed recursively down to :code:`<Channel><Channel>` epochs. """ # XXX(damb): Check if <Station></Station> epoch element is already # available - if not simply append. for _sta_element in net_element.iterfind(sta_element.tag): if elements_equal(sta_element, _sta_element, exclude_tags=[ '{}{}'.format(ns, self.CHANNEL_TAG) for ns in namespaces ], recursive=False): # XXX(damb): Channels are ALWAYS appended; no merging is # performed for _cha_element in self._emerge_cha_elements( sta_element, namespaces): _sta_element.append(_cha_element) break else: net_element.append(sta_element)
def pings(self, netlocs): pool = ThreadPool(processes=len(netlocs)) rt_secs = pool.map(self.ping, netlocs, chunksize=1) pool.close() pool.join() return zip(netlocs, rt_secs)
def cross_validation(params, n_processes): gc.disable() params.model_architecture = "h-rnn-rnn" params.predictions_filename = 'predictions.txt' params.eval_batch_size = 2 params.predict_batch_size = 2 params.save_trans_params = True params.ckpt = None # Other params.gpu = None params.random_seed = None params.log_device_placement = False params.timeline = False # optimizer params.learning_rate = 0.01 params.optimizer = 'adam' params.colocate_gradients_with_ops = True params.start_decay_step = 0 params.decay_steps = 10000 params.decay_factor = 0.98 params.max_gradient_norm = 5.0 # training params.batch_size = 2 params.num_epochs = 10 params.num_ckpt_epochs = 1 # network params.init_op = 'uniform' params.init_weight = 0.1 params.uttr_time_major = False params.sess_time_major = False params.input_emb_trainable = True params.out_bias = True params.forget_bias = 1.0 params.connect_inp_to_out = False params.uttr_activation = "relu" params.sess_activation = "relu" # cnn params.filter_sizes = '3,4' params.num_filters = 10 params.pool_size = 1 params.padding = 'valid' params.stride = 1 #network params.uttr_layers = 1 params.sess_layers = 1 params.uttr_rnn_type = 'uni' params.sess_rnn_type = 'uni' params.uttr_unit_type = 'gru' params.sess_unit_type = 'gru' params.uttr_pooling = 'last' params.uttr_attention_size = 32 params.input_emb_size = 300 params.out_dir = 'experiments/out_model/splits' params.n_classes = 27 params.hparams_path = None # What symbols to use for unk and pad. params.unk = '<unk>' params.pad = '<pad>' params.feature_size = 12624 params.data_folder = 'experiments/data/splits' params.n_jobs = 6 nn_params = { "uttr_units": [20, 50], "sess_units": [None], "uttr_hid_to_out_dropout": [2], "sess_hid_to_out_dropout": [None, 10, 20] } param_combs = list(ParameterGrid(nn_params)) print("\n") print("Run Cross validation for model %s and %d param combinations." % (params.model_architecture, len(param_combs))) print("\n") loss_cv, acc_cv, f1_cv, pr_cv, rc_cv = [], [], [], [], [] def cross_validate_comb(params, tr_val_labels, comb, i): print("Run cross validation for params: %s" % comb) params.uttr_units = comb["uttr_units"] params.sess_units = comb["sess_units"] params.uttr_hid_to_out_dropout = comb["uttr_hid_to_out_dropout"] params.sess_hid_to_out_dropout = comb["sess_hid_to_out_dropout"] avg_loss, avg_acc, avg_f1, avg_pr, avg_rc = src.train.train.cross_validate_helper.run_cross_validate( params, tr_val_labels) results = {} results["avg_loss"] = avg_loss results["avg_acc"] = avg_acc results["avg_f1"] = avg_f1 results["avg_pr"] = avg_pr results["avg_rc"] = avg_rc return results, i def save_async_result_to_list(result, i, result_list): result_list[i] = result def callback_error(result): print('error', result) pool = ThreadPool(processes=n_processes) results = [{}] * len(param_combs) start_time_cv = time.time() for i, comb in enumerate(param_combs): try: pool.apply_async(cross_validate_comb, args=(params, tr_val_labels, comb, i), callback=lambda result: save_async_result_to_list( result[0], result[1], results), error_callback=callback_error) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_tb(exc_tb) pool.close() pool.join() print("Cross validation finished in %f secs" % (time.time() - start_time_cv)) for i, res in enumerate(results): print("Loss for comb %d: %.3f" % (i, res["avg_loss"])) print("Accuracy score for comb %d: %.3f" % (i, res["avg_acc"])) print("F1 score for comb %d: %.3f" % (i, res["avg_f1"])) print("Precision for comb %d: %.3f" % (i, res["avg_pr"])) print("Recall for comb %d: %.3f" % (i, res["avg_rc"])) loss_cv.append(res["avg_loss"]) acc_cv.append(res["avg_acc"]) f1_cv.append(res["avg_f1"]) pr_cv.append(res["avg_pr"]) rc_cv.append(res["avg_rc"]) loss_min_idx = np.argmax(loss_cv) acc_max_idx = np.argmax(acc_cv) f1_max_idx = np.argmax(f1_cv) pr_max_idx = np.argmax(pr_cv) rc_max_idx = np.argmax(rc_cv) print("Min Loss score: %.3f for params %s" % (loss_cv[loss_min_idx], param_combs[loss_min_idx])) print("Max Accuracy score: %.3f for params %s" % (acc_cv[acc_max_idx], param_combs[acc_max_idx])) print("Max F1 score: %.3f for params %s" % (f1_cv[f1_max_idx], param_combs[f1_max_idx])) print("Max Precision: %.3f for params %s" % (pr_cv[pr_max_idx], param_combs[pr_max_idx])) print("Max Recall: %.3f for params %s" % (rc_cv[rc_max_idx], param_combs[rc_max_idx])) gc.enable()
def get_routemanagers(self): from multiprocessing.pool import ThreadPool global mode_mapping # returns list of routemanagers with area IDs areas = {} area_arr = self.__raw_json["areas"] thread_pool = ThreadPool(processes=4) areas_procs = {} for area in area_arr: if area["geofence_included"] is None: raise RuntimeError("Cannot work without geofence_included") geofence_included = Path(area["geofence_included"]) if not geofence_included.is_file(): log.error("Geofence included file configured does not exist") sys.exit(1) geofence_excluded_raw_path = area.get("geofence_excluded", None) if geofence_excluded_raw_path is not None: geofence_excluded = Path(geofence_excluded_raw_path) if not geofence_excluded.is_file(): log.error("Geofence excluded specified but does not exist") sys.exit(1) area_dict = { "mode": area["mode"], "geofence_included": area["geofence_included"], "geofence_excluded": area.get("geofence_excluded", None), "routecalc": area["routecalc"] } # also build a routemanager for each area... # grab coords # first check if init is false or raids_ocr is set as mode, if so, grab the coords from DB # coords = np.loadtxt(area["coords"], delimiter=',') geofence_helper = GeofenceHelper( area["geofence_included"], area.get("geofence_excluded", None)) mode = area["mode"] # build routemanagers if mode == "raids_ocr" or mode == "raids_mitm": route_manager = RouteManagerRaids( self.db_wrapper, None, mode_mapping[area["mode"]]["range"], mode_mapping[area["mode"]]["max_count"], area["geofence_included"], area.get("geofence_excluded", None), area["routecalc"], mode=area["mode"], settings=area.get("settings", None), init=area.get("init", False), name=area.get("name", "unknown")) elif mode == "mon_mitm": route_manager = RouteManagerMon( self.db_wrapper, None, mode_mapping[area["mode"]]["range"], mode_mapping[area["mode"]]["max_count"], area["geofence_included"], area.get("geofence_excluded", None), area["routecalc"], mode=area["mode"], coords_spawns_known=area.get("coords_spawns_known", False), init=area.get("init", False), name=area.get("name", "unknown"), settings=area.get("settings", None)) elif mode == "iv_mitm": route_manager = RouteManagerIV( self.db_wrapper, None, 0, 999999, area["geofence_included"], area.get("geofence_excluded", None), area["routecalc"], name=area.get("name", "unknown"), settings=area.get("settings", None), mode=mode) elif mode == "pokestops": route_manager = RouteManagerMon( self.db_wrapper, None, mode_mapping[area["mode"]]["range"], mode_mapping[area["mode"]]["max_count"], area["geofence_included"], area.get("geofence_excluded", None), area["routecalc"], mode=area["mode"], init=area.get("init", False), name=area.get("name", "unknown"), settings=area.get("settings", None)) else: log.error("Invalid mode found in mapping parser.") sys.exit(1) if not mode == "iv_mitm": if mode == "raids_ocr" or area.get("init", False) is False: # grab data from DB depending on mode # TODO: move routemanagers to factory if mode == "raids_ocr" or mode == "raids_mitm": coords = self.db_wrapper.gyms_from_db(geofence_helper) elif mode == "mon_mitm": spawn_known = area.get("coords_spawns_known", False) if spawn_known: log.info("Reading known Spawnpoints from DB") coords = self.db_wrapper.get_detected_spawns( geofence_helper) else: log.info("Reading unknown Spawnpoints from DB") coords = self.db_wrapper.get_undetected_spawns( geofence_helper) elif mode == "pokestops": coords = self.db_wrapper.stops_from_db(geofence_helper) else: log.fatal("Mode not implemented yet: %s" % str(mode)) exit(1) else: # calculate all level N cells (mapping back from mapping above linked to mode) # coords = S2Helper.get_s2_cells_from_fence(geofence=geofence_helper, # cell_size=mode_mapping[mode]["s2_cell_level"]) coords = S2Helper._generate_locations( mode_mapping[area["mode"]]["range"], geofence_helper) route_manager.add_coords_list(coords) max_radius = mode_mapping[area["mode"]]["range"] max_count_in_radius = mode_mapping[area["mode"]]["max_count"] if not area.get("init", False): log.info("Calculating route for %s" % str(area.get("name", "unknown"))) proc = thread_pool.apply_async(route_manager.recalc_route, args=(max_radius, max_count_in_radius, 0, False)) areas_procs[area["name"]] = proc else: log.info( "Init mode enabled and more than 400 coords in init. Going row-based for %s" % str(area.get("name", "unknown"))) # we are in init, let's write the init route to file to make it visible in madmin if area["routecalc"] is not None: routefile = area["routecalc"] if os.path.isfile(routefile + '.calc'): os.remove(routefile + '.calc') with open(routefile + '.calc', 'a') as f: for loc in coords: f.write( str(loc.lat) + ', ' + str(loc.lng) + '\n') # gotta feed the route to routemanager... TODO: without recalc... proc = thread_pool.apply_async(route_manager.recalc_route, args=(1, 99999999, 0, False)) areas_procs[area["name"]] = proc # log.error("Calculated route, appending another coord and recalculating") area_dict["routemanager"] = route_manager areas[area["name"]] = area_dict for area in areas_procs.keys(): to_be_checked = areas_procs[area] log.debug(to_be_checked) to_be_checked.get() thread_pool.close() thread_pool.join() return areas
class CmdUpload(object): """ This class is responsible for uploading packages to remotes. The flow is: - Collect all the data from the local cache: - Collect the refs that matches the given pattern _collect_refs_to_upload - Collect for every ref all the binaries IDs that has to be uploaded "_collect_packages_to_upload". This may discard binaries that do not belong to the current RREV The collection of this does the interactivity (ask user if yes/no), the errors (don't upload packages with policy=build_always, and computing the full REVISIONS for every that has to be uploaded. No remote API calls are done in this step, everything is local - Execute the upload. For every ref: - Upload the recipe of the ref: "_upload_recipe" - If not FORCE, check the date "_check_recipe_date", i.e. if there are changes, do not allow uploading if the remote date is newer than the local cache one - Retrieve the sources (exports_sources), if they are not cached, and uploading to a different remote. "complete_recipe_sources" - Gather files and create 2 .tgz (exports, exports_sources) with "_compress_recipe_files" - Decide which files have to be uploaded and deleted from the server based on the different with the remote snapshot "_recipe_files_to_upload" This can raise if upload policy is not overwrite - Execute the real transfer "remote_manager.upload_recipe()" - For every package_id of every ref: "_upload_package" - Gather files and create package.tgz. "_compress_package_files" - (Optional) Do the integrity check of the package - Decide which files to upload and delete from server: "_package_files_to_upload". Can raise if policy is NOT overwrite - Do the actual upload All the REVISIONS are local defined, not retrieved from servers This requires calling to the remote API methods: - get_recipe_sources() to get the export_sources if they are missing - get_recipe_snapshot() to do the diff and know what files to upload - get_package_snapshot() to do the diff and know what files to upload - get_recipe_manifest() to check the date and raise if policy requires - get_package_manifest() to raise if policy!=force and manifests change """ def __init__(self, cache, user_io, remote_manager, loader, hook_manager): self._cache = cache self._user_io = user_io self._output = progress_bar.ProgressOutput(self._user_io.out) self._remote_manager = remote_manager self._loader = loader self._hook_manager = hook_manager self._upload_thread_pool = None self._exceptions_list = [] def upload(self, reference_or_pattern, remotes, upload_recorder, package_id=None, all_packages=None, confirm=False, retry=None, retry_wait=None, integrity_check=False, policy=None, query=None, parallel_upload=False): t1 = time.time() refs, confirm = self._collects_refs_to_upload(package_id, reference_or_pattern, confirm) refs_by_remote = self._collect_packages_to_upload( refs, confirm, remotes, all_packages, query, package_id) if parallel_upload: self._upload_thread_pool = ThreadPool(8) self._user_io.disable_input() else: self._upload_thread_pool = ThreadPool(1) for remote, refs in refs_by_remote.items(): self._output.info("Uploading to remote '{}':".format(remote.name)) def upload_ref(ref_conanfile_prefs): _ref, _conanfile, _prefs = ref_conanfile_prefs self._upload_ref(_conanfile, _ref, _prefs, retry, retry_wait, integrity_check, policy, remote, upload_recorder, remotes) self._upload_thread_pool.map(upload_ref, [(ref, conanfile, prefs) for (ref, conanfile, prefs) in refs]) self._upload_thread_pool.close() self._upload_thread_pool.join() for exception in self._exceptions_list: self._output.error(str(exception)) if len(self._exceptions_list) > 0: raise ConanException("Errors uploading some packages") logger.debug("UPLOAD: Time manager upload: %f" % (time.time() - t1)) def _collects_refs_to_upload(self, package_id, reference_or_pattern, confirm): """ validate inputs and compute the refs (without revisions) to be uploaded """ if package_id and not check_valid_ref(reference_or_pattern, strict_mode=False): raise ConanException( "-p parameter only allowed with a valid recipe reference, " "not with a pattern") if package_id or check_valid_ref(reference_or_pattern): # Upload package ref = ConanFileReference.loads(reference_or_pattern) if ref.revision and not self._cache.config.revisions_enabled: raise ConanException( "Revisions not enabled in the client, specify a " "reference without revision") refs = [ ref, ] confirm = True else: refs = search_recipes(self._cache, reference_or_pattern) if not refs: raise NotFoundException( ("No packages found matching pattern '%s'" % reference_or_pattern)) return refs, confirm def _collect_packages_to_upload(self, refs, confirm, remotes, all_packages, query, package_id): """ compute the references with revisions and the package_ids to be uploaded """ # Group recipes by remote refs_by_remote = defaultdict(list) for ref in refs: metadata = self._cache.package_layout(ref).load_metadata() if ref.revision and ref.revision != metadata.recipe.revision: raise ConanException( "Recipe revision {} does not match the one stored in the cache {}" .format(ref.revision, metadata.recipe.revision)) ref = ref.copy_with_rev(metadata.recipe.revision) remote = remotes.selected if remote: ref_remote = remote else: ref_remote = metadata.recipe.remote ref_remote = remotes.get_remote(ref_remote) upload = True if not confirm: msg = "Are you sure you want to upload '%s' to '%s'?" % ( str(ref), ref_remote.name) upload = self._user_io.request_boolean(msg) if upload: try: conanfile_path = self._cache.package_layout( ref).conanfile() conanfile = self._loader.load_basic(conanfile_path) except NotFoundException: raise NotFoundException( ("There is no local conanfile exported as %s" % str(ref))) # TODO: This search of binary packages has to be improved, more robust # So only real packages are retrieved if all_packages or query: if all_packages: query = None # better to do a search, that will retrieve real packages with ConanInfo # Not only "package_id" folders that could be empty package_layout = self._cache.package_layout( ref.copy_clear_rev()) packages = search_packages(package_layout, query) packages_ids = list(packages.keys()) elif package_id: packages_ids = [ package_id, ] else: packages_ids = [] if packages_ids: if conanfile.build_policy == "always": raise ConanException( "Conanfile '%s' has build_policy='always', " "no packages can be uploaded" % str(ref)) prefs = [] # Gather all the complete PREFS with PREV for package in packages_ids: package_id, prev = package.split( "#") if "#" in package else (package, None) if package_id not in metadata.packages: raise ConanException("Binary package %s:%s not found" % (str(ref), package_id)) if prev and prev != metadata.packages[package_id].revision: raise ConanException( "Binary package %s:%s#%s not found" % (str(ref), package_id, prev)) # Filter packages that don't match the recipe revision if self._cache.config.revisions_enabled and ref.revision: rec_rev = metadata.packages[package_id].recipe_revision if ref.revision != rec_rev: self._output.warn( "Skipping package '%s', it doesn't belong to the" " current recipe revision" % package_id) continue package_revision = metadata.packages[package_id].revision assert package_revision is not None, "PREV cannot be None to upload" prefs.append( PackageReference(ref, package_id, package_revision)) refs_by_remote[ref_remote].append((ref, conanfile, prefs)) return refs_by_remote def _upload_ref(self, conanfile, ref, prefs, retry, retry_wait, integrity_check, policy, recipe_remote, upload_recorder, remotes): """ Uploads the recipes and binaries identified by ref """ assert (ref.revision is not None), "Cannot upload a recipe without RREV" conanfile_path = self._cache.package_layout(ref).conanfile() # FIXME: I think it makes no sense to specify a remote to "pre_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("pre_upload", conanfile_path=conanfile_path, reference=ref, remote=recipe_remote) msg = "\rUploading %s to remote '%s'" % (str(ref), recipe_remote.name) self._output.info(left_justify_message(msg)) try: self._upload_recipe(ref, conanfile, retry, retry_wait, policy, recipe_remote, remotes) upload_recorder.add_recipe(ref, recipe_remote.name, recipe_remote.url) except ConanException as exc: self._exceptions_list.append(exc) return # Now the binaries if prefs: total = len(prefs) p_remote = recipe_remote def upload_package_index(index_pref): try: index, pref = index_pref up_msg = "\rUploading package %d/%d: %s to '%s'" % ( index + 1, total, str(pref.id), p_remote.name) self._output.info(left_justify_message(up_msg)) self._upload_package(pref, retry, retry_wait, integrity_check, policy, p_remote) upload_recorder.add_package(pref, p_remote.name, p_remote.url) return conanfile_path, ref, recipe_remote, None except ConanException as exc: return None, None, None, exc def upload_package_callback(ret): for cf_path, r_ref, r_rem, exc in ret: if exc is None: # FIXME: I think it makes no sense to specify a remote to "post_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("post_upload", conanfile_path=cf_path, reference=r_ref, remote=r_rem) else: self._exceptions_list.append(exc) # This doesn't wait for the packages to end, so the function returns # and the "pool entry" for the recipe is released self._upload_thread_pool.map_async( upload_package_index, [(index, pref) for index, pref in enumerate(prefs)], callback=upload_package_callback) else: # FIXME: I think it makes no sense to specify a remote to "post_upload" # FIXME: because the recipe can have one and the package a different one self._hook_manager.execute("post_upload", conanfile_path=conanfile_path, reference=ref, remote=recipe_remote) def _upload_recipe(self, ref, conanfile, retry, retry_wait, policy, remote, remotes): current_remote_name = self._cache.package_layout( ref).load_metadata().recipe.remote if remote.name != current_remote_name: complete_recipe_sources(self._remote_manager, self._cache, conanfile, ref, remotes) conanfile_path = self._cache.package_layout(ref).conanfile() self._hook_manager.execute("pre_upload_recipe", conanfile_path=conanfile_path, reference=ref, remote=remote) t1 = time.time() the_files = self._compress_recipe_files(ref) with self._cache.package_layout(ref).update_metadata() as metadata: metadata.recipe.checksums = calc_files_checksum(the_files) local_manifest = FileTreeManifest.loads( load(the_files["conanmanifest.txt"])) remote_manifest = None if policy != UPLOAD_POLICY_FORCE: # Check SCM data for auto fields if hasattr(conanfile, "scm") and (conanfile.scm.get("url") == "auto" or conanfile.scm.get("revision") == "auto"): raise ConanException( "The recipe has 'scm.url' or 'scm.revision' with 'auto' " "values. Use '--force' to ignore this error or export again " "the recipe ('conan export' or 'conan create') in a " "repository with no-uncommitted changes or by " "using the '--ignore-dirty' option") remote_manifest = self._check_recipe_date(ref, remote, local_manifest) if policy == UPLOAD_POLICY_SKIP: return ref files_to_upload, deleted = self._recipe_files_to_upload( ref, policy, the_files, remote, remote_manifest, local_manifest) if files_to_upload or deleted: self._remote_manager.upload_recipe(ref, files_to_upload, deleted, remote, retry, retry_wait) self._upload_recipe_end_msg(ref, remote) else: self._output.info("Recipe is up to date, upload skipped") duration = time.time() - t1 log_recipe_upload(ref, duration, the_files, remote.name) self._hook_manager.execute("post_upload_recipe", conanfile_path=conanfile_path, reference=ref, remote=remote) # The recipe wasn't in the registry or it has changed the revision field only if not current_remote_name: with self._cache.package_layout(ref).update_metadata() as metadata: metadata.recipe.remote = remote.name return ref def _upload_package(self, pref, retry=None, retry_wait=None, integrity_check=False, policy=None, p_remote=None): assert (pref.revision is not None), "Cannot upload a package without PREV" assert (pref.ref.revision is not None), "Cannot upload a package without RREV" conanfile_path = self._cache.package_layout(pref.ref).conanfile() self._hook_manager.execute("pre_upload_package", conanfile_path=conanfile_path, reference=pref.ref, package_id=pref.id, remote=p_remote) t1 = time.time() the_files = self._compress_package_files(pref, integrity_check) with self._cache.package_layout( pref.ref).update_metadata() as metadata: metadata.packages[pref.id].checksums = calc_files_checksum( the_files) if policy == UPLOAD_POLICY_SKIP: return None files_to_upload, deleted = self._package_files_to_upload( pref, policy, the_files, p_remote) if files_to_upload or deleted: self._remote_manager.upload_package(pref, files_to_upload, deleted, p_remote, retry, retry_wait) logger.debug("UPLOAD: Time upload package: %f" % (time.time() - t1)) else: self._output.info("Package is up to date, upload skipped") duration = time.time() - t1 log_package_upload(pref, duration, the_files, p_remote) self._hook_manager.execute("post_upload_package", conanfile_path=conanfile_path, reference=pref.ref, package_id=pref.id, remote=p_remote) logger.debug("UPLOAD: Time uploader upload_package: %f" % (time.time() - t1)) metadata = self._cache.package_layout(pref.ref).load_metadata() cur_package_remote = metadata.packages[pref.id].remote if not cur_package_remote and policy != UPLOAD_POLICY_SKIP: with self._cache.package_layout( pref.ref).update_metadata() as metadata: metadata.packages[pref.id].remote = p_remote.name return pref def _compress_recipe_files(self, ref): export_folder = self._cache.package_layout(ref).export() for f in (EXPORT_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME): tgz_path = os.path.join(export_folder, f) if is_dirty(tgz_path): self._output.warn("%s: Removing %s, marked as dirty" % (str(ref), f)) os.remove(tgz_path) clean_dirty(tgz_path) files, symlinks = gather_files(export_folder) if CONANFILE not in files or CONAN_MANIFEST not in files: raise ConanException("Cannot upload corrupted recipe '%s'" % str(ref)) export_src_folder = self._cache.package_layout(ref).export_sources() src_files, src_symlinks = gather_files(export_src_folder) the_files = _compress_recipe_files(files, symlinks, src_files, src_symlinks, export_folder, self._output) return the_files def _compress_package_files(self, pref, integrity_check): t1 = time.time() # existing package, will use short paths if defined package_folder = self._cache.package_layout( pref.ref, short_paths=None).package(pref) if is_dirty(package_folder): raise ConanException("Package %s is corrupted, aborting upload.\n" "Remove it with 'conan remove %s -p=%s'" % (pref, pref.ref, pref.id)) tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME) if is_dirty(tgz_path): self._output.warn("%s: Removing %s, marked as dirty" % (str(pref), PACKAGE_TGZ_NAME)) os.remove(tgz_path) clean_dirty(tgz_path) # Get all the files in that directory files, symlinks = gather_files(package_folder) if CONANINFO not in files or CONAN_MANIFEST not in files: logger.error("Missing info or manifest in uploading files: %s" % (str(files))) raise ConanException("Cannot upload corrupted package '%s'" % str(pref)) logger.debug("UPLOAD: Time remote_manager build_files_set : %f" % (time.time() - t1)) if integrity_check: self._package_integrity_check(pref, files, package_folder) logger.debug( "UPLOAD: Time remote_manager check package integrity : %f" % (time.time() - t1)) the_files = _compress_package_files(files, symlinks, package_folder, self._output) return the_files def _recipe_files_to_upload(self, ref, policy, the_files, remote, remote_manifest, local_manifest): self._remote_manager.check_credentials(remote) remote_snapshot = self._remote_manager.get_recipe_snapshot(ref, remote) files_to_upload = { filename.replace("\\", "/"): path for filename, path in the_files.items() } if not remote_snapshot: return files_to_upload, set() deleted = set(remote_snapshot).difference(the_files) if policy != UPLOAD_POLICY_FORCE: if remote_manifest is None: # This is the weird scenario, we have a snapshot but don't have a manifest. # Can be due to concurrency issues, so we can try retrieve it now try: remote_manifest, _ = self._remote_manager.get_recipe_manifest( ref, remote) except NotFoundException: # This is weird, the manifest still not there, better upload everything self._output.warn( "The remote recipe doesn't have the 'conanmanifest.txt' " "file and will be uploaded: '{}'".format(ref)) return files_to_upload, deleted if remote_manifest == local_manifest: return None, None if policy in (UPLOAD_POLICY_NO_OVERWRITE, UPLOAD_POLICY_NO_OVERWRITE_RECIPE): raise ConanException( "Local recipe is different from the remote recipe. " "Forbidden overwrite.") return files_to_upload, deleted def _package_files_to_upload(self, pref, policy, the_files, remote): self._remote_manager.check_credentials(remote) remote_snapshot = self._remote_manager.get_package_snapshot( pref, remote) if remote_snapshot and policy != UPLOAD_POLICY_FORCE: if not is_package_snapshot_complete(remote_snapshot): return the_files, set([]) remote_manifest, _ = self._remote_manager.get_package_manifest( pref, remote) local_manifest = FileTreeManifest.loads( load(the_files["conanmanifest.txt"])) if remote_manifest == local_manifest: return None, None if policy == UPLOAD_POLICY_NO_OVERWRITE: raise ConanException( "Local package is different from the remote package. Forbidden" " overwrite.") deleted = set(remote_snapshot).difference(the_files) return the_files, deleted def _upload_recipe_end_msg(self, ref, remote): msg = "\rUploaded conan recipe '%s' to '%s'" % (str(ref), remote.name) url = remote.url.replace("https://api.bintray.com/conan", "https://bintray.com") msg += ": %s" % url self._output.info(left_justify_message(msg)) def _package_integrity_check(self, pref, files, package_folder): # If package has been modified remove tgz to regenerate it self._output.rewrite_line("Checking package integrity...") # short_paths = None is enough if there exist short_paths layout = self._cache.package_layout(pref.ref, short_paths=None) read_manifest, expected_manifest = layout.package_manifests(pref) if read_manifest != expected_manifest: self._output.writeln("") diff = read_manifest.difference(expected_manifest) for fname, (h1, h2) in diff.items(): self._output.warn( "Mismatched checksum '%s' (manifest: %s, file: %s)" % (fname, h1, h2)) if PACKAGE_TGZ_NAME in files: try: tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME) os.unlink(tgz_path) except Exception: pass error_msg = os.linesep.join( "Mismatched checksum '%s' (manifest: %s, file: %s)" % (fname, h1, h2) for fname, (h1, h2) in diff.items()) logger.error("Manifests doesn't match!\n%s" % error_msg) raise ConanException("Cannot upload corrupted package '%s'" % str(pref)) else: self._output.rewrite_line("Package integrity OK!") self._output.writeln("") def _check_recipe_date(self, ref, remote, local_manifest): try: remote_recipe_manifest, ref = self._remote_manager.get_recipe_manifest( ref, remote) except NotFoundException: return # First time uploading this package if (remote_recipe_manifest != local_manifest and remote_recipe_manifest.time > local_manifest.time): self._print_manifest_information(remote_recipe_manifest, local_manifest, ref, remote) raise ConanException( "Remote recipe is newer than local recipe: " "\n Remote date: %s\n Local date: %s" % (remote_recipe_manifest.time, local_manifest.time)) return remote_recipe_manifest def _print_manifest_information(self, remote_recipe_manifest, local_manifest, ref, remote): try: self._output.info("\n%s" % ("-" * 40)) self._output.info("Remote manifest:") self._output.info(remote_recipe_manifest) self._output.info("Local manifest:") self._output.info(local_manifest) difference = remote_recipe_manifest.difference(local_manifest) if "conanfile.py" in difference: contents = load(self._cache.package_layout(ref).conanfile()) endlines = "\\r\\n" if "\r\n" in contents else "\\n" self._output.info("Local 'conanfile.py' using '%s' line-ends" % endlines) remote_contents = self._remote_manager.get_recipe_path( ref, path="conanfile.py", remote=remote) endlines = "\\r\\n" if "\r\n" in remote_contents else "\\n" self._output.info( "Remote 'conanfile.py' using '%s' line-ends" % endlines) self._output.info("\n%s" % ("-" * 40)) except Exception as e: self._output.info("Error printing information about the diff: %s" % str(e))
def index_images(paths, aspect_ratio, height, width, nchannels=3, vectorization_scaling_factor=1, index_class=faiss.IndexFlatL2, verbose=1, caching=True, use_detect_faces=False, nprocesses=4): """ @param: paths (list of Strings OR glob pattern string) image paths to load @param: aspect_ratio (float) height / width @param: height (int) desired height of tile images @param: width (int) desired width of tile images @param: nchannels (int) number of channels in image @param: vectorization_scaling_factor (float) the factor to multiply by for the vectorization values smaller than 1 will save memory space at the cost of quality of matches because the image will be downsized before vectorization @param: index_class (Faiss Index class) the ANN class to lookup codebook images with """ try: # index our images vectorization_dimensionality = int(height * width * nchannels * vectorization_scaling_factor) index = index_class(vectorization_dimensionality) # create our pool and go! starttime = time.time() if isinstance(paths, str): # paths is a glob pattern like: 'images/blah/*.jpg' paths = glob.glob(paths) # should we retrieve a cached index? if caching: print("Caching is ON, checking for previously cached index...") cache = MosaicCacheConfig(paths=paths, height=height, width=width, nchannels=nchannels, index_class=index_class, dimensions=vectorization_dimensionality, detect_faces=use_detect_faces) cached = cache.load() if cached is not None: print("Found cached index, reading from disk...") return cached['index'], cached['images'], cached['tile_images'] else: print("No cached index found, creating from scratch...") # nothing cached, let's index path_jobs = [(p, height, width, nchannels, aspect_ratio, use_detect_faces) for p in paths] #[:200] pool = ThreadPool(nprocesses) results = pool.map(load_and_vectorize_image, path_jobs) pool.close() # how fast did we go? elapsed = time.time() - starttime if verbose: print("Indexing: %d images, %.4f seconds (%.4f per image)" % (len(path_jobs), elapsed, elapsed / len(path_jobs))) # get the results, store in ordered (indexed) list images = [] vectors = [] for image, vector in results: if image is not None and vector is not None: if use_detect_faces and not image.faces: # if we're told to use faces, skip any images # without them continue vectors.append(vector) images.append(image) if use_detect_faces: print("Using only images with faces: total=%d, withfaces=%d" % (len(results), len(images))) if not images: print( "No images contained faces :( Exiting and returning None's" ) return None, None, None # create matrix and index matrix = np.array(vectors).reshape(-1, vectorization_dimensionality) index.add(matrix) # resize images to tiles if verbose: print("Resizing images to (%d, %d)..." % (height, width)) tile_images = [] for image in images: img = image.load_image() img_h, img_w, _ = img.shape tile = cv2.resize(img, None, fx=height / float(img_h), fy=width / float(img_w), interpolation=cv2.INTER_AREA) tile_images.append(tile) if caching: print("Caching index to disk...") cache.save(matrix, images, tile_images) return index, images, tile_images except Exception: import traceback print(traceback.format_exc()) import ipdb ipdb.set_trace() return None, None, None
# Repos to create/update, sorted by environment. repo_objects_create = [] repo_objects_update = {} for env in all_envs: repo_objects_update[env] = [] # All repo defs as Repo objects all_repos = [JuicerRepo(repo['name'], repo_def=repo) for repo in repo_defs] # Detailed information on all existing repos existing_repos = {} repo_pool = ThreadPool() # Parallelize getting the repo lists env_results = [repo_pool.apply_async(self.list_repos, tuple(), kwds={'envs': [er]}, callback=existing_repos.update) for er in all_envs] repo_pool.close() for result_async in env_results: result_async.wait() repo_pool.join() for repo in all_repos: # 'env' is all environments if: 'env' is not defined; 'env' is an empty list current_env = repo.get('env', []) if current_env == []: juicer.utils.Log.log_debug("Setting 'env' to all_envs for repo: %s" % repo['name']) repo['env'] = all_envs # Assemble a set of all specified environments. defined_envs = juicer.utils.unique_repo_def_envs(all_repos)
class Scheduler(MooseObject): """ Base class for handling jobs asynchronously. To use this class, call .schedule() and supply a list of testers to schedule. Each group of testers supplied will begin running immediately. Syntax: .schedule([list of tester objects]) A list of testers will be added to a queue and begin calling their derived run method. You can continue to add more testers to the queue in this fashion. Once you schedule all the testers you wish to test, call .waitFinish() to wait until all testers have finished. """ @staticmethod def validParams(): params = MooseObject.validParams() params.addRequiredParam('average_load', 64.0, "Average load to allow") params.addRequiredParam('max_processes', None, "Hard limit of maxium processes to use") return params # This is what will be checked for when we look for valid schedulers IS_SCHEDULER = True def __init__(self, harness, params): MooseObject.__init__(self, harness, params) ## The test harness to run callbacks on self.harness = harness # Retrieve and store the TestHarness options for use in this object self.options = harness.getOptions() # The Scheduler class can be initialized with no "max_processes" argument and it'll default # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit. # The difference is whether or not we allow single jobs to exceed the number of slots. if params['max_processes'] == None: self.available_slots = 1 self.soft_limit = True else: self.available_slots = params['max_processes'] # hard limit self.soft_limit = False # Requested average load level to stay below self.average_load = params['average_load'] # The time the status queue reported no activity to the TestHarness self.last_reported = clock() # A set containing jobs that have been reported self.jobs_reported = set([]) # Initialize run_pool based on available slots self.run_pool = ThreadPool(processes=self.available_slots) # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered) self.status_pool = ThreadPool(processes=1) # Slot Lock when processing resource allocations self.slot_lock = threading.Lock() # DAG Lock when processing the DAG self.dag_lock = threading.Lock() # Workers in use (single job might request multiple slots) self.slots_in_use = 0 # Jobs waiting to finish (includes actively running jobs) self.job_queue_count = 0 # Set containing our Job containers. We use this in the event of a KeyboardInterrupt to # iterate over and kill any subprocesses self.tester_datas = set([]) def killRemaining(self): """ Method to kill any running subprocess started by the Scheduler. This also closes the status pool to prevent further statuses from printing to the screen. """ self.run_pool.close() self.status_pool.close() for tester_data in self.tester_datas: tester_data.killProcess() self.job_queue_count = 0 def reportSkipped(self, jobs): """ Allow derived schedulers to do something with skipped jobs """ return def preLaunch(self, job_dag): """ Allow derived schedulers to modify the DAG before jobs are launched """ return def run(self, job_container): """ Call derived run method """ return def postRun(self, job_container): """ Allow derived schdulers to perform post run methods on job """ return def cleanUp(self): """ Allow derived schedulers to perform cleanup operations """ return def notifyFinishedSchedulers(self): """ Notify derived schedulers we are finished """ return def skipPrereqs(self): """ Method to return boolean to skip dependency prerequisites checks. """ if self.options.ignored_caveats: if 'all' in self.options.ignored_caveats or 'prereq' in self.options.ignored_caveats: return True return False def processDownstreamTests(self, job_container): """ Method to discover and delete downstream jobs due to supplied job failing. """ with self.dag_lock: failed_job_containers = set([]) tester = job_container.getTester() job_dag = job_container.getDAG() if (tester.isFinished() and not tester.didPass() and not tester.isSilent() and not self.skipPrereqs()) \ and not tester.isQueued() \ or (self.options.dry_run and not tester.isSilent()): # Ask the DAG to delete and return the downstream jobs associated with this job failed_job_containers.update( job_dag.delete_downstreams(job_container)) for failed_job in failed_job_containers: failed_tester = failed_job.getTester() failed_tester.setStatus('skipped dependency', failed_tester.bucket_skip) return failed_job_containers def buildDAG(self, job_container_dict, job_dag): """ Build the DAG and catch any failures. """ failed_or_skipped_testers = set([]) # Create DAG independent nodes for tester_name, job_container in job_container_dict.iteritems(): tester = job_container.getTester() # If this tester is not runnable, continue to the next tester if tester.getRunnable(self.options): job_dag.add_node_if_not_exists(job_container) else: failed_or_skipped_testers.add(tester) continue # Create edge nodes for tester_name, job_container in job_container_dict.iteritems(): tester = job_container.getTester() # Add the prereq node and edges for prereq in tester.getPrereqs(): try: # Try to produce a KeyError and capture an unknown dependency job_container_dict[prereq] # Try to produce either a cyclic or skipped dependency error using the DAG's # built-in exception methods job_dag.add_edge(job_container_dict[prereq], job_container) # Skipped Dependencies except dag.DAGEdgeIndError: if not self.skipPrereqs(): if self.options.reg_exp: tester.setStatus('dependency does not match re', tester.bucket_skip) else: tester.setStatus('skipped dependency', tester.bucket_skip) failed_or_skipped_testers.add(tester) # Add the parent node / dependency edge to create a functional DAG now that we have caught # the skipped dependency (needed for discovering race conditions later on) job_dag.add_node_if_not_exists(job_container_dict[prereq]) job_dag.add_edge(job_container_dict[prereq], job_container) # Cyclic Failure except dag.DAGValidationError: tester.setStatus('Cyclic or Invalid Dependency Detected!', tester.bucket_fail) failed_or_skipped_testers.add(tester) # Unknown Dependency Failure except KeyError: tester.setStatus('unknown dependency', tester.bucket_fail) failed_or_skipped_testers.add(tester) # Skipped/Silent/Deleted Testers fall into this catagory, caused by 'job_container' being skipped # during the first iteration above except dag.DAGEdgeDepError: pass # With a working DAG created above (even a partial one), discover race conditions with remaining runnable # testers. failed_or_skipped_testers.update(self.checkRaceConditions(job_dag)) return failed_or_skipped_testers def checkRaceConditions(self, dag_object): """ Return a set of failing testers exhibiting race conditions with their output file. """ failed_or_skipped_testers = set([]) # clone the dag so we can operate destructively on the cloned dag dag_clone = dag_object.clone() while dag_clone.size(): output_files_in_dir = set() # Get a list of concurrent job containers concurrent_jobs = dag_clone.ind_nodes() for job_container in concurrent_jobs: tester = job_container.getTester() output_files = tester.getOutputFiles() # check if we have colliding output files if len(output_files_in_dir.intersection(set(output_files))): # Fail this concurrent group of testers for this_job in concurrent_jobs: failed_tester = this_job.getTester() failed_tester.setStatus('OUTFILE RACE CONDITION', tester.bucket_fail) failed_or_skipped_testers.add(failed_tester) # collisions detected, move on to the next set break output_files_in_dir.update(output_files) # Delete this group of job containers and allow the loop to continue for job_container in concurrent_jobs: dag_clone.delete_node(job_container) return failed_or_skipped_testers def schedule(self, testers): """ Schedule supplied list of testers for execution. """ # If any threads caused an exception, we have already closed down the queue and need to # not schedule any more jobs if self.run_pool._state: return # Instance the DAG class so we can share it amongst all the Job containers job_dag = dag.DAG() non_runnable_jobs = set([]) name_to_job_container = {} # Increment our simple queue count with the number of testers the scheduler received with self.slot_lock: self.job_queue_count += len(testers) # Create a local dictionary of tester names to job containers. Add this dictionary to a # set. We will use this set as a way to gain access to their methods. for tester in testers: name_to_job_container[tester.getTestName()] = Job( tester, job_dag, self.options) self.tester_datas.add(name_to_job_container[tester.getTestName()]) # Populate job_dag with testers. This method will also return any testers which caused failures # while building the DAG. skipped_or_failed_testers = self.buildDAG(name_to_job_container, job_dag) # Create a set of failing job containers for failed_tester in skipped_or_failed_testers: non_runnable_jobs.add( name_to_job_container[failed_tester.getTestName()]) # Iterate over the jobs in our non_runnable_jobs and handle any downstream jobs affected by # 'job'. These will be our 'skipped dependency' tests. for job in non_runnable_jobs.copy(): additionally_skipped = self.processDownstreamTests(job) non_runnable_jobs.update(additionally_skipped) job_dag.delete_node_if_exists(job) # Get a count of all the items still in the DAG. These will be the jobs that ultimately are queued runnable_jobs = job_dag.size() # Make sure we didn't drop a tester somehow if len(non_runnable_jobs) + runnable_jobs != len(testers): raise SchedulerError( 'Runnable tests in addition to Skipped tests does not match total scheduled test count!' ) # Inform derived schedulers of the jobs we are skipping immediately self.reportSkipped(non_runnable_jobs) # Assign a status thread to begin work on any skipped/failed jobs self.queueJobs(status_jobs=non_runnable_jobs) # Allow derived schedulers to modify the dag before we launch # TODO: We don't like this, and this will change when we move to better DAG handling. if runnable_jobs: self.preLaunch(job_dag) # Build our list of runnable jobs and set the tester's status to queued job_list = [] if runnable_jobs: job_list = job_dag.ind_nodes() for job_container in job_list: tester = job_container.getTester() tester.setStatus('QUEUED', tester.bucket_pending) # Queue runnable jobs self.queueJobs(run_jobs=job_list) def waitFinish(self): """ Block while the job queue is not empty. Once empty, this method will begin closing down the thread pools and perform a join. Once the last thread exits, we return from this method. There are two thread pools in play; the Tester pool which is performing all the tests, and the Status pool which is handling the printing of tester statuses. Because the Status pool will always have the last item needing to be 'printed', we close and join the Tester pool first, and then we do the same to the Status pool. """ while self.job_queue_count > 0: sleep(0.5) self.run_pool.close() self.run_pool.join() self.status_pool.close() self.status_pool.join() # Notify derived schedulers we are exiting self.notifyFinishedSchedulers() def handleLongRunningJobs(self, job_container): """ Handle jobs that have not reported in alotted time """ if job_container not in self.jobs_reported: tester = job_container.getTester() tester.setStatus('RUNNING...', tester.bucket_pending) self.queueJobs(status_jobs=[job_container]) # Restart the reporting timer for this job job_container.report_timer = threading.Timer( float(tester.getMinReportTime()), self.handleLongRunningJobs, (job_container, )) job_container.report_timer.start() def handleTimeoutJobs(self, job_container): """ Handle jobs that have timed out """ tester = job_container.getTester() tester.setStatus('TIMEOUT', tester.bucket_fail) job_container.killProcess() def getLoad(self): """ Method to return current load average """ loadAverage = 0.0 try: loadAverage = os.getloadavg()[0] except AttributeError: pass # getloadavg() not available in this implementation of os return loadAverage def satisfyLoad(self): """ Method for controlling load average """ while self.slots_in_use > 1 and self.getLoad() >= self.average_load: sleep(1.0) def reserveSlots(self, job_container): """ Method which allocates resources to perform the job. Returns bool if job should be allowed to run based on available resources. """ tester = job_container.getTester() # comply with load average if self.options.load: self.satisfyLoad() with self.slot_lock: can_run = False if self.slots_in_use + job_container.getProcessors( ) <= self.available_slots: can_run = True # Check for insufficient slots -soft limit # TODO: Create a unit test for this case elif job_container.getProcessors( ) > self.available_slots and self.soft_limit: tester.specs.addParam('caveats', ['OVERSIZED'], "") can_run = True # Check for insufficient slots -hard limit (skip this job) # TODO: Create a unit test for this case elif job_container.getProcessors( ) > self.available_slots and not self.soft_limit: tester.setStatus('insufficient slots', tester.bucket_skip) if can_run: self.slots_in_use += job_container.getProcessors() return can_run def getNextJobGroup(self, job_dag): """ Prepare and return a list of concurrent runnable jobs """ with self.dag_lock: next_job_list = [] # Get concurrent available job list concurrent_jobs = job_dag.ind_nodes() for job_container in concurrent_jobs: tester = job_container.getTester() # Verify this job is not already running/pending/skipped if tester.isInitialized(): # Set this next new job to pending so as to prevent this job from being launched a second time tester.setStatus('QUEUED', tester.bucket_pending) next_job_list.append(job_container) return next_job_list def queueJobs(self, status_jobs=[], run_jobs=[]): """ Method to control which thread pool jobs enter. Syntax: To have a job(s) display its current status to the screen: .queueJobs(status_jobs=[job_container_list] To begin running job(s): .queueJobs(run_jobs=[job_container_list] """ for job_container in run_jobs: if not self.run_pool._state: self.run_pool.apply_async(self.runWorker, (job_container, )) for job_container in status_jobs: if not self.status_pool._state: self.status_pool.apply_async(self.statusWorker, (job_container, )) def statusWorker(self, job_container): """ Method the status_pool calls when an available thread becomes ready """ # Wrap entire statusWorker thread inside a try/exception to catch thread errors try: tester = job_container.getTester() # If the job is still running for a long period of time and we have not reported # this same job alread, report it now. if tester.isPending(): if clock() - self.last_reported >= float( tester.getMinReportTime( )) and job_container not in self.jobs_reported: # Inform the TestHarness of a long running test (RUNNING...) self.harness.handleTestStatus(job_container) # ...And then set the finished caveat now that the running status has printed tester.specs.addParam('caveats', ['FINISHED'], "") # Add this job to the reported container so it does not happen again self.jobs_reported.add(job_container) # Job is 'Pending', but is under the threshold to be reported (return now so # last_reported time does not get updated). This will ensure that if nothing # has happened between 'now' and another occurrence of our thread timer event # we do report it. else: return else: # All other statuses are sent unmolested self.harness.handleTestStatus(job_container) # Decrement the job queue count now that this job has finished if tester.isFinished(): with self.slot_lock: self.job_queue_count -= 1 # Record current reported time only if it is an activity the user will see if not tester.isSilent() or not tester.isDeleted(): self.last_reported = clock() except Exception as e: print('statusWorker Exception: %s' % (e)) self.killRemaining() def runWorker(self, job_container): """ Method the run_pool calls when an available thread becomes ready """ # Wrap the entire runWorker thread inside a try/exception to catch thread errors try: tester = job_container.getTester() # Check if there are enough resources to run this job if self.reserveSlots(job_container): # Start long running timer job_container.report_timer = threading.Timer( float(tester.getMinReportTime()), self.handleLongRunningJobs, (job_container, )) job_container.report_timer.start() # Start timeout timer timeout_timer = threading.Timer(float(tester.getMaxTime()), self.handleTimeoutJobs, (job_container, )) timeout_timer.start() # Call the derived run method (blocking) self.run(job_container) # Stop timers now that the job has finished on its own job_container.report_timer.cancel() timeout_timer.cancel() # Derived run needs to set a non-pending status of some sort. if tester.isPending(): raise SchedulerError( 'Derived Scheduler %s can not return a pending status!' % (self.__class__)) # Determin if this job creates any skipped dependencies (if it failed), and send # this new list of jobs to the status queue to be printed. possibly_skipped_job_containers = self.processDownstreamTests( job_container) possibly_skipped_job_containers.add(job_container) self.queueJobs(status_jobs=possibly_skipped_job_containers) # Delete this job from the shared DAG while the DAG is locked with self.dag_lock: job_dag = job_container.getDAG() job_dag.delete_node(job_container) # Get next job list next_job_group = self.getNextJobGroup(job_dag) # Allow derived schedulers to perform post run operations self.postRun(job_container) # Recover worker count before attempting to queue more jobs with self.slot_lock: self.slots_in_use = max( 0, self.slots_in_use - job_container.getProcessors()) # Queue this new batch of runnable jobs self.queueJobs(run_jobs=next_job_group) # Not enough slots to run the job, currently else: # There will never be enough slots to run this job (insufficient slots) if tester.isFinished(): failed_downstream = self.processDownstreamTests( job_container) failed_downstream.add(job_container) self.queueJobs(status_jobs=failed_downstream) # There are no available slots, currently. Place back in queue, and sleep for a bit else: self.queueJobs(run_jobs=[job_container]) sleep(0.3) except Exception as e: print('runWorker Exception: %s' % (e)) self.killRemaining()
def build_cache_maps(context, configurations, region, installed_region): """Build a giant cache of instances, volumes, snapshots for region""" LOG.info("Building cache of instance, volume, and snapshots in %s", region) LOG.info("This may take a while...") cache_data = { # calculated here locally 'instance_id_to_data': {}, 'instance_id_to_config': {}, 'volume_id_to_instance_id': {}, # calculated w/ multiprocessing module 'snapshot_id_to_data': {}, 'volume_id_to_snapshot_count': {}, 'volume_id_to_most_recent_snapshot_date': {}, } # build an EC2 client, we're going to need it ec2 = boto3.client('ec2', region_name=region) if len(configurations) <= 0: LOG.info('No configurations found in %s, not building cache', region) return cache_data # populate them LOG.info("Retrieved %s DynamoDB configurations for caching", str(len(configurations))) # build a list of any IDs (anywhere) that we should ignore ignore_ids = build_ignore_list(configurations) for config in configurations: # stop if we're running out of time if ebs_snapper.timeout_check(context, 'build_cache_maps'): break # if it's missing the match section, ignore it if not validate_snapshot_settings(config): continue # build a boto3 filter to describe instances with configuration_matches = config['match'] filters = convert_configurations_to_boto_filter(configuration_matches) # if we ended up with no boto3 filters, we bail so we don't snapshot everything if len(filters) <= 0: LOG.warn('Could not convert configuration match to a filter: %s', configuration_matches) continue filters.append({ 'Name': 'instance-state-name', 'Values': ['running', 'stopped'] }) instances = ec2.describe_instances(Filters=filters) res_list = instances.get('Reservations', []) random.shuffle(res_list) # attempt to randomize order, for timeouts for reservation in res_list: inst_list = reservation.get('Instances', []) random.shuffle( inst_list) # attempt to randomize order, for timeouts for instance_data in inst_list: instance_id = instance_data['InstanceId'] # skip if we're ignoring this if instance_id in ignore_ids: continue cache_data['instance_id_to_config'][instance_id] = config cache_data['instance_id_to_data'][instance_id] = instance_data for dev in instance_data.get('BlockDeviceMappings', []): vid = dev['Ebs']['VolumeId'] # skip if we're ignoring this if vid in ignore_ids: continue cache_data['volume_id_to_instance_id'][vid] = instance_id LOG.info("Retrieved %s instances for caching", str(len(cache_data['instance_id_to_data'].keys()))) # look at each volume, get snapshots and count / most recent, and map to instance process_volumes = cache_data['volume_id_to_instance_id'].keys()[:] LOG.info("Retrieved %s volumes for caching", str(len(process_volumes))) chunked_work = [] while len(process_volumes) > 0: popped = process_volumes[:25] del process_volumes[:25] chunked_work.append(popped) LOG.debug('Split out volume work into %s lists, pulling snapshots...', str(len(chunked_work))) if len(chunked_work) > 0: f = functools.partial(chunk_volume_work, region) pool = ThreadPool(processes=4) results = pool.map(f, chunked_work) pool.close() pool.join() keys = [ 'volume_id_to_most_recent_snapshot_date', 'volume_id_to_snapshot_count', 'snapshot_id_to_data' ] for result_chunk in results: for k in keys: cache_data[k].update(result_chunk[k]) LOG.info("Retrieved %s snapshots for caching", str(len(cache_data['snapshot_id_to_data']))) return cache_data