def fetch_item_info(session, observations, claims, verbose=False): """ Fetches information about wikidata items. :Parameters: session : :class:`mwapi.Session` An API session to use for querying observations : `iterable`(`dict`) A collection of observations to annotate claims : `list` ( `str` ) A set of property names to look up claims for verbose : `bool` Print dots and stuff :Returns: An `iterator` of observations augmented with an `autolabel` field containing the requested information. Note that observations that can't be found will be excluded. """ batches = chunkify(observations, 25) executor = ThreadPoolExecutor(max_workers=4) _fetch_item_info = build_fetch_item_info(session, claims) for annotated_batch in executor.map(_fetch_item_info, batches): for annotated_item in annotated_batch: yield annotated_item if verbose: sys.stderr.write(".") sys.stderr.flush() if verbose: sys.stderr.write("\n")
def _connect_rover_fired(self): """Handle callback for HTTP rover connections. """ if not self.device_uid: msg = "\nDevice ID not found!\n\nConnection requires a valid Piksi device ID." self._prompt_setting_error(msg) return if not self.http: self._prompt_networking_error("\nNetworking disabled!") return try: _base_pragma = self.base_pragma if not self.http.connect_write(self.link, self.whitelist, pragma=_base_pragma): msg = ("\nUnable to connect to Skylark!\n\n" "Please check that you have a network connection.") self._prompt_networking_error(msg) self.http.close() self.connected_rover = False return self.connected_rover = True print "Connected as a base station!" executor = ThreadPoolExecutor(max_workers=2) executor.submit(self._retry_read) except: self.connected_rover = False import traceback print traceback.format_exc()
def search(rootTitle, destTitle, session, maxDepth=-1): ''' Given the title of a wikipedia page, find how many 'hops' it takes to get to the given destination page. Uses the breadth first search algorithm. ''' visited = set() consumerQueue = queue.Queue(MAX_CONSUMER_QUEUE_SIZE) consumerQueue.put(page.Page(rootTitle, session, 0)) producerQueue = queue.Queue() executor = ThreadPoolExecutor(2) # Start thread to consume pages consumerArgs = [destTitle, consumerQueue, producerQueue, visited] consumerFuture = executor.submit(consumer, *consumerArgs) # Start thread to produce child pages and add them to consumer queue producerArgs = [consumerQueue, producerQueue, session] executor.submit(producer, *producerArgs) return consumerFuture.result()
def main(): # Update configuration from the local file from .configuration import Setting Setting.read_cfg_from_file() # Print instance information print("Node name: {0}\nNode address: {1}".format(Setting.get_node_name(), Setting.get_node_addr())) # Reset data in the database from .meta_storage import MetaStorage meta_storage = MetaStorage() meta_storage.drop_database() meta_storage.close_connection() print("Clear data in the database complete.") # Reset data in the local storage from general.services import Services if not Services.is_folder_exist(Setting.get_local_storage()): Services.t_print(Setting.get_local_storage() + " does not exist! (Local Storage).") # Get file from the folder import glob import os files = glob.glob(Setting.get_local_storage() + "*") for file in files: os.remove(file) print("Clear {0} files in the local storage complete.".format(len(files))) # Create a thread for running REST service from concurrent.futures import ThreadPoolExecutor pool = ThreadPoolExecutor() pool.submit(run_rest_service)
def run_test_program(test_cases: List[TestCase], program: str, get_flags: Callable[[str], List[str]]) -> List[Result]: """ Run the program and return a list of results. """ def run(test_case): test_dir, test_name = os.path.split(test_case.file_path) flags = get_flags(test_dir) test_flags = get_test_flags(test_case.file_path) cmd = [program] if test_case.input is None: cmd.append(test_name) cmd += flags + test_flags if verbose: print('Executing', ' '.join(cmd)) try: output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, cwd=test_dir, universal_newlines=True, input=test_case.input) except subprocess.CalledProcessError as e: # we don't care about nonzero exit codes... for instance, type # errors cause hh_single_type_check to produce them output = e.output return check_result(test_case, output) executor = ThreadPoolExecutor(max_workers=max_workers) futures = [executor.submit(run, test_case) for test_case in test_cases] return [future.result() for future in futures]
def txmake_all(): executor = ThreadPoolExecutor(4) futures = [] for node_type, attr in _texture_attrs: for node in cmds.ls(type=node_type) or (): src = cmds.getAttr(node + '.' + attr).strip() if not src: continue if src.endswith('.tex'): dst = src src = dst.rsplit('.', 1)[0] else: dst = src + '.tex' if not os.path.exists(src): print 'MISSING TEXTURE from %s.%s %s: %s' % (node_type, attr, node, src) continue if os.path.exists(dst) and os.path.getmtime(src) <= os.path.getmtime(dst): print 'Skipping up-to-date %s.%s %s: %s' % (node_type, attr, node, dst) future = None else: print 'Txmaking %s.%s %s: %s' % (node_type, attr, node, src) future = executor.submit(txmake, src, dst, newer=False) futures.append((future, node, attr, dst)) for future, node, attr, dst in futures: if future: future.result() # Wait for it cmds.setAttr(node + '.' + attr, dst, type='string')
def main(cmd, args): logging.basicConfig( level=logging.INFO, format=("%(relativeCreated)04d %(process)05d %(threadName)-10s " "%(levelname)-5s %(msg)s")) pool = Pool(max_workers=1) arg1 = tmp_fname arg2 = 'arg2' stage_file_name = cmd if (glob_stage == 1): f = pool.submit(check_output, ["ec-perl", "-w", stage_file_name, args, arg2], shell=True) print "stage 1 started. Initiate call to" + stage_file_name print "Stage is " + str(glob_stage) else: f = pool.submit(check_output, ["ec-perl", stage_file_name, " ", args, arg2], shell=True) print "stage 2 started. Initiate call to" + stage_file_name + " " + args + " " + arg2 + "'" print "Stage is " + str(glob_stage) f.add_done_callback(callback) pool.shutdown(wait=False)
def main(): db.fixkeys(key_utils.to_ipv6) parser = argparse.ArgumentParser(description='Submit nodes and links to fc00') parser.add_argument('-v', '--verbose', help='increase output verbosity', dest='verbose', action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() con = connect() nodes = dump_node_store(con) edges = {} get_peer_queue = queue.Queue(0) result_queue = queue.Queue(0) e = ThreadPoolExecutor(max_workers=4) def args(): for ip,node in nodes.items(): yield ip,keyFromAddr(node['addr']),node['path'],node['version'] args = zip(*args()) dbnodes = {} for peers, node_id, ip in e.map(get_peers_derp, *args): get_edges_for_peers(edges, peers, node_id) addpeersto(dbnodes,node_id,ip,peers) for ip, id in peers: addpeersto(dbnodes,id,ip) print('otay!') send_graph(dbnodes, edges) sys.exit(0)
def _request(self, src_lang, tgt_lang, src_texts): """ Description: Receive src_texts, which should be a list of texts to be translated. _request method calls _basic_request method for http request, and assembles the JSON dictionary returned by _basic_request. For case that _basic_request needs to be called multiple times, concurrent.futures package is adopt for the usage of threads concurrency. Return Value: String object. """ executor = ThreadPoolExecutor(max_workers=len(src_texts)) threads = [] for src_text in src_texts: future = executor.submit( self._basic_request, src_lang, tgt_lang, src_text, ) threads.append(future) # check whether all threads finished or not. # con_success = self._check_threads(threads) #if not con_success: # return "Please check your site" if threads[0].result() == None: return "Error in network!" merged_text = self._merge_text( [future.result() for future in threads], ) return merged_text
def handler(event, contest): logger.info("Start!") executor = ThreadPoolExecutor(max_workers=1000) main_loop = asyncio.new_event_loop() main_loop.set_default_executor(executor) asyncio.set_event_loop(main_loop) poll = Poll(main_loop) cal = poll.cal update_and_delete = UpdateAndDelete(main_loop, executor) table = event['table'] queue_url = event['queueUrl'] message_count = event['messageCount'] poll.messages(sqs, queue_url, message_count) logger.info("Receive API count: {}".format(poll.fetch_count)) logger.info("Fetched messages: {}".format(poll.message_count)) update_and_delete.execute(sqs_client, db, queue_url, table, cal.stats) logger.info("Update API count: {}".format(update_and_delete.update_count)) logger.info("Delete API count: {}".format(update_and_delete.delete_count)) logger.info("Delete Message count: {}".format( update_and_delete.deleted_message_count)) main_loop.close() executor.shutdown() return "Lambda job finished successfully."
def play(av, n): proxy_list = get_proxy(n) executor = ThreadPoolExecutor(max_workers=n) play_video_av = partial(play_video_1, av=av) for data in executor.map(play_video_av, proxy_list): print("in main: 1 success".format(data))
class ThreadPool: def __init__(self, workers=1): self._jobs = [] self._pool = ThreadPoolExecutor(workers) def _finish(self, job): try: self._jobs.remove(job) except ValueError: pass try: e = job.exception() if e: raise e except Exception as e: error(str(e), exc_info=True) def schedule(self, func): job = self._pool.submit(func) job.add_done_callback(self._finish) self._jobs.append(job) def shutdown(self): for job in as_completed(self._jobs): job.result() self._pool.shutdown()
def __init__(self): settings_manager = SettingsManager() # Set up the settings_manager max_workers = settings_manager.getint('application', 'max-workers') # Get the max workers from settings manager profiler_on = settings_manager.getint('debugging', 'profiler-on') # Get whether there is a profiler absolute = settings_manager.getint('save', 'absolute') # Get whether it's an absolute path save_path = settings_manager.get('save', 'path') # Get whether it's an absolute path if not absolute: save_path = PROJECT_PATH + os.path.sep + save_path executor = ThreadPoolExecutor(max_workers=max_workers, profiler_on=profiler_on) # Set up the thread executor dis = Disassembler(settings_manager) # Build the disassembler server = PyDAServer('0.0.0.0',9000) # Set up the PyDA server save_manager = SaveManager(save_path) if profiler_on: profile = Profile() profile.enable() app.build_and_run(settings_manager, dis, executor, server, save_manager) # Run the interface if profiler_on: profile.disable() stats = executor.getProfileStats() if stats == None: stats = Stats(profile) else: stats.add(profile) with open('profile.stats', 'wb') as statsfile: stats.stream = statsfile stats.sort_stats('cumulative').print_stats()
def run_test_program(files, program, expect_ext, get_flags): """ Run the program and return a list of Failures. """ def run(f): test_dir, test_name = os.path.split(f) flags = get_flags(test_dir) cmd = [program, test_name] + flags if verbose: print('Executing', ' '.join(cmd)) try: output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, cwd=test_dir, universal_newlines=True) except subprocess.CalledProcessError as e: # we don't care about nonzero exit codes... for instance, type # errors cause hh_single_type_check to produce them output = e.output return check_result(f, expect_ext, output) executor = ThreadPoolExecutor(max_workers=max_workers) futures = [executor.submit(run, f) for f in files] results = [f.result() for f in futures] return [r for r in results if r is not None]
class ModulePool: def __init__(self, workers=1): self._jobs = {} self._pool = ThreadPoolExecutor(workers) def _launch(self, func, hook, dependencies): if dependencies: for dependency in dependencies: while True: if dependency in self._jobs.keys(): self._jobs[dependency].result() break else: sleep(0.1) func(hook) def schedule(self, func, hook): innerfunc, args, module, hookname, dependencies = hook job = self._pool.submit(self._launch, func, hook, dependencies) self._jobs[module] = job def shutdown(self): for job in as_completed([self._jobs[j] for j in self._jobs]): job.result() self._pool.shutdown()
class AsyncClient(object): """Client which uses the base to be more performant. This client uses Futures with a ThreadPoolExecutor. This allows requests to be executed asynchronously. Asynchronous execution with multiple Clients enables requests to be processed in parallel and with pipeline execution at the server, which can drastically improve achievable interoperability rate as observed at the client. Note that methods return Future objects. Users should handle the response and errors appropriately. If serial request execution is desired, ensure the Future response or error is received prior to making another request. """ def __init__(self, url, username, password, timeout=1): """Create a new AsyncClient and login. Args: url: Base URL of interoperability server (e.g., http://localhost:8000) username: Interoperability username password: Interoperability password timeout: Individual session request timeout (seconds) """ self.client = Client(url, username, password, timeout) self.server_info_executor = ThreadPoolExecutor(max_workers=1) self.uas_telemetry_executor = ThreadPoolExecutor(max_workers=1) self.obstacles_executor = ThreadPoolExecutor(max_workers=1) def get_server_info(self): """GET server information, to be displayed to judges. Returns: Future object which contains the return value or error from the underlying Client. """ return self.server_info_executor.submit(self.client.get_server_info) def post_telemetry(self, telem): """POST new telemetry. Args: telem: Telemetry object containing telemetry state. Returns: Future object which contains the return value or error from the underlying Client. """ return self.uas_telemetry_executor.submit(self.client.post_telemetry, telem) def get_obstacles(self): """GET obstacles. Returns: Future object which contains the return value or error from the underlying Client. """ return self.obstacles_executor.submit(self.client.get_obstacles)
def _getResults(runIDs, output_handler, benchmark): executor = ThreadPoolExecutor(MAX_SUBMISSION_THREADS) while len(runIDs) > 0 : start = time() runIDsFutures = {} failedRuns = [] for runID in runIDs: state = _isFinished(runID) if state == "FINISHED" or state == "UNKOWN": run = runIDs[runID] future = executor.submit(_getAndHandleResult, runID, run, output_handler, benchmark) runIDsFutures[future] = runID elif state == "ERROR": failedRuns.append(runID) # remove all finished runs from _unfinished_run_ids for future in as_completed(runIDsFutures.keys()): if future.result(): del runIDs[runIDsFutures[future]] _unfinished_run_ids.remove(runIDsFutures[future]) # remove failed runs from _unfinished_run_ids for runID in failedRuns: _unfinished_run_ids.remove(runID) del runIDs[runID] end = time(); duration = end - start if duration < 5: sleep(5 - duration)
def test(no_workers, pipeline, input, state, run_function = run_pipeline): executor = ThreadPoolExecutor(max_workers = no_workers) try: result = run_function(executor, pipeline, input, state) finally: executor.shutdown(True) return result
class HttpThreadpool(object): def __init__(self, max_workers=10, queue_size=200): self.executor = ThreadPoolExecutor(max_workers, queue_size) @retry(max_tries=3) def _download(self, url): req = urllib2.Request(url) req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36') rsp = urllib2.urlopen(req, timeout=30) return rsp.read() def download_and_process(self, url, body_process): return self.executor.submit(self._download_and_process, url, body_process) def _download_and_process(self, url, body_process): body_func, body_args, body_kw = body_process body = self._download(url) try: body_func(body, *body_args, **body_kw) except Exception as e: print url, traceback.format_exec() def shutdown(self): self.executor.shutdown()
def main(): parser = argparse.ArgumentParser() parser.add_argument("shader", nargs='*', default=['shaders'], metavar="<shader_file | shader dir>", help="A shader file or directory containing shader " "files. Defaults to 'shaders/'") args = parser.parse_args() os.environ["shader_precompile"] = "true" os.environ["allow_glsl_extension_directive_midshader"] = "true" if "INTEL_DEBUG" in os.environ: print("Warning: INTEL_DEBUG environment variable set!", file=sys.stderr) os.environ["INTEL_DEBUG"] += ",vs,gs,fs" else: os.environ["INTEL_DEBUG"] = "vs,gs,fs" try: os.stat("bin/glslparsertest") except OSError: print("./bin must be a symlink to a built piglit bin directory") sys.exit(1) runtimebefore = time.time() filenames = process_directories(args.shader) executor = ThreadPoolExecutor(cpu_count()) for t in executor.map(run_test, filenames): sys.stdout.write(t) runtime = time.time() - runtimebefore print("shader-db run completed in {:.1f} secs".format(runtime))
class ElasticDataSink: def __init__(self, name, conn, model_identifier, workers=5, bound=10000): self.name = name self.conn = conn self.model_identifier = model_identifier self.queue = Queue(maxsize=bound) self.pool = ThreadPoolExecutor(max_workers=workers) def start(self): self.model_identifier.model_class.init(using=self.conn) def __sink_item(self): try: item = self.queue.get_nowait() save_status = item.save(using=self.conn) if not save_status: logger.error("Error saving the item to the sink") else: logger.info("item saved to the sink") except Empty as e: logger.warn("sink queue is empty") logger.warn(e) def sink_item(self, item): assert isinstance(item, self.model_identifier.model_class), \ " item must be instance of " + self.model_identifier.model_class try: self.queue.put(item, timeout=10) self.pool.submit(self.__sink_item) except Full as e: logger.error("sink queue is full") logger.error(e)
def time_sosfilt(self, n_samples, threads): pool = ThreadPoolExecutor(max_workers=threads) futures = [] for i in range(threads): futures.append(pool.submit(sosfilt, self.filt, self.chunks[i])) wait(futures)
def start(self, s=0, ms=50, event_executor=None): """Start the main loop for the context in a create thread, and then return. :param int s: timeout value (seconds). Passed to :meth:`event_wait` in the main loop. :param int ms: timeout value (seconds). Passed to :meth:`event_wait` in the main loop. :param concurrent.futures.Executor event_executor: Event executor instance. Events will be fired in it. Default is a :class:`concurrent.futures.ThreadPoolExecutor` instance :return: New created event loop thread. :rtype: threading.Thread This method returns soon after the main loop thread started, so it **does not block**. Equal to set :attr:`is_running` to `True` """ self.logger.info('<0x%x>start: >>> s=%s, ms=%s', id(self), s, ms) if self._is_running: raise RuntimeError("Context loop already started.") if event_executor: self._event_executor = event_executor else: try: self._event_executor = ThreadPoolExecutor() except TypeError: # Changed in version 3.5: If max_workers is None or not given, it will default to the number of processors on the machine, multiplied by 5 self._event_executor = ThreadPoolExecutor(cpu_count() * 5) self._event_loop_thread = threading.Thread( target=self._event_loop, args=(s, ms)) self._start_cond.acquire() self._event_loop_thread.start() self._start_cond.wait() self._start_cond.release() self.logger.info('<0x%x>start: <<< -> %s', id(self), self._event_loop_thread) return self._event_loop_thread
def upload_file(upload_file_name, temp_file_name='encoded.csv', split_file_format="{orig_file}_{id}.{orig_ext}", parent_folder_id='0B46HJMu9Db4xTUxhQ0x4WHpfVmM'): file_name = os.path.basename(upload_file_name) # Encode file. base64.encode(open(upload_file_name), open(temp_file_name, 'w+')) # Split file. num_split_files, file_names = splitfile(temp_file_name, SPLIT_SIZE, split_file_format) # Start upload threads. start = time.time() file_id = uuid.uuid1() thread_pool = ThreadPoolExecutor(max_workers=MAX_DOWNLOADS) for i in range(num_split_files): current_file_name = file_names[i] up_t = upload_worker.UploadWorker(index=i + 1, file_id=file_id, filename=file_name, parent_folder_id=parent_folder_id, total_file_num=num_split_files, upload_file_name=current_file_name) future = thread_pool.submit(up_t.run) # Wait for completion. thread_pool.shutdown() end = time.time() m, s = divmod(end - start, 60) print "Overall time taken: ", m, "m ", s, "s" return file_id
class LoaferRunner: def __init__(self, loop=None, max_workers=None, on_stop_callback=None): self._on_stop_callback = on_stop_callback self.loop = loop or asyncio.get_event_loop() # XXX: See https://github.com/python/asyncio/issues/258 # The minimum value depends on the number of cores in the machine # See https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor self._executor = ThreadPoolExecutor(max_workers) self.loop.set_default_executor(self._executor) def start(self, future=None, run_forever=True): start = 'starting Loafer, pid={}, run_forever={}' logger.info(start.format(os.getpid(), run_forever)) self.loop.add_signal_handler(signal.SIGINT, self.stop) self.loop.add_signal_handler(signal.SIGTERM, self.stop) try: if run_forever: self.loop.run_forever() else: self.loop.run_until_complete(future) self.stop() except CancelledError: self.loop.close() def stop(self, *args, **kwargs): logger.info('stopping Loafer ...') if callable(self._on_stop_callback): self._on_stop_callback() self._executor.shutdown(wait=True) if self.loop.is_running(): self.loop.stop()
def start_client(self): if self.simulate_sensor is True: from stemlab_client.sensors.dht22_sensor_simulator import DHT22Sensor else: from stemlab_client.sensors.dht22_sensor import DHT22Sensor try: exit_monitor = GracefulExit() self._load_persistent_data() if self._device_settings is None: self._setup_device() shelve_db = shelve.open(self._db_filename, writeback=True) shelve_db[self.SETTINGS_KEY] = self._device_settings shelve_db.close() sensor = DHT22Sensor(self._device_settings.device_id, units=FAHRENHEIT) executor = ThreadPoolExecutor(4) next_reading = time.time() while True: readings = sensor.poll() for reading in readings: template = self._device_settings.measurement_templates[reading.measurement_type.name] post_data = generate_measurement_params(template, reading) post_future = executor.submit(post, post_data['href'], post_data['params']) post_future.add_done_callback(callback_reading_post) next_reading += self.poll_interval time.sleep(next_reading - time.time()) if exit_monitor.exit_now is True: break except Exception as e: print traceback.format_exc() print str(e)
class RemoteHelloConsumer(object): def __init__(self): self._helloservice = None self._name = "Python" self._msg = "Hello Java" self._executor = ThreadPoolExecutor() @Validate def _validate(self, bundle_context): # call it! resp = self._helloservice.sayHello(self._name + "Sync", self._msg) print( "{0} IHello service consumer received sync response: {1}".format( self._name, resp ) ) # call sayHelloAsync which returns Future and we add lambda to print # the result when done self._executor.submit( self._helloservice.sayHelloAsync, self._name + "Async", self._msg ).add_done_callback( lambda f: print("async response: {0}".format(f.result())) ) print("done with sayHelloAsync method") # call sayHelloAsync which returns Future and we add lambda to print # the result when done self._executor.submit( self._helloservice.sayHelloPromise, self._name + "Promise", self._msg, ).add_done_callback( lambda f: print("promise response: {0}".format(f.result())) ) print("done with sayHelloPromise method")
def handler(event, contest): logger.info("Start!") executor = ThreadPoolExecutor(max_workers=100) cal = Sum() queue_url = event['queueUrl'] message_count = event['messageCount'] queue = sqs.Queue(queue_url) num_of_calls = message_count // batch_count queues = [] for i in range(num_of_calls): queues.append(queue) message_count = 0 responses = executor.map(one_request, queues) for response in responses: message_count += len(response) for msg in response: cal.add(msg) logger.info("Receive API count: {}".format(num_of_calls)) logger.info("Fetched messages: {}".format(message_count)) executor.shutdown()
class BaseDataLayer(Layer): def setup(self, bottom, top): param = eval(self.param_str_) self.batch_size_ = param['batch_size'] self.data_setup(bottom, top) top[0].reshape(*self.data_.shape) self.executor_ = ThreadPoolExecutor(max_workers=1) self.thread_ = self.executor_.submit(self.internal_thread_entry) def reshape(self, bottom, top): pass def forward(self, bottom, top): self.thread_.result() top[0].reshape(*self.data_.shape) top[0].data[...] = self.data_ self.thread_ = self.executor_.submit(self.internal_thread_entry) def data_setup(self, bottom, top): raise NotImplementedError() def internal_thread_entry(self): raise NotImplementedError() def __del__(self): self.thread_.result() self.executor_.shutdown() super(self.__class__, self).__del__()
class ScheduledIOPool(Thread): """Schedule events to an IO worker pool. """ def __init__(self, workers, delay): super(ScheduledIOPool, self).__init__() self.scheduler = sched.scheduler(time.time, time.sleep) self.thread_pool = ThreadPoolExecutor(max_workers=workers) self.delay = delay self.shutdown = False self.daemon = True def shutdown(self): self.shutdown = True # TODO shutdown def run(self): while not self.shutdown: try: self.scheduler.run() time.sleep(.1) # TODO: no wait/notify when queue is empty except: # TODO log print "Unexpected error scheduling IO:" traceback.print_exc() time.sleep(.1) self.thread_pool.shutdown() def cancel(self, event): return self.scheduler.cancel(event) def schedule(self, *args): return self.scheduler.enter(self.delay, 1, self.thread_pool.submit, args)
from flask import jsonify from flask_adminlte import AdminLTE from models import AdminUser, SimulationTask, ResultsPerDay, Base from concurrent.futures import ThreadPoolExecutor import sys import json from models import db_session, engine, Base from tasks import run_long_task from tasks import print_exception from sqlalchemy import func import logging import logging.config from config import g_logger #Only One Task per time executor = ThreadPoolExecutor(1) task_future = None #Main function contains multiple routes def create_rido_application(configfile=None): app = Flask(__name__) AdminLTE(app) current_user = AdminUser() @app.teardown_appcontext def shutdown_session(exception=None): db_session.remove() @app.before_first_request
from .text import Text from .writers import PILWriter def _to_inches(millimeters: float) -> float: return millimeters / 25.4 def _to_millimeters(inches: float) -> float: return inches * 25.4 logger = logging.getLogger(__name__) loop = asyncio.get_event_loop() thread_executor = ThreadPoolExecutor() class EmptyPageException(Exception): pass class Page: height_inches = FunctionBindDescriptor("height", _to_inches, _to_millimeters) width_inches = FunctionBindDescriptor("width", _to_inches, _to_millimeters) resolution = FunctionBindDescriptor("dpi", _to_inches, _to_millimeters) def __init__(self, height: int, width: int, dpi: int, columns=1) -> None: self.uuid = uuid4()
# res = pp.submit(get_page, i) # ret.append(res) # pp.shutdown() # for i in ret: # print(i.result()) """map提交任务""" from concurrent.futures import ThreadPoolExecutor def get_page(i): time.sleep(0.5) return i pp = ThreadPoolExecutor(5) t = pp.map(get_page, range(100)) pp.shutdown() for i in t: print(i, ) """ 回调函数 """ """ 9.6 协程 """ """ gevent模块 """
print('Key not found!') sys.exit() sys.stdout.write('Downloading %d file(s) from %s\n' % (len(titles), album_url)) sys.stdout.flush() # Downloads and writes to file using title def download(url, title): filename = title + '.' + url.split('.')[-1] files = os.listdir(folder) # List files in folder if filename in files: # Skip file if already downloaded/downloading return # Download file from url r = requests.get(r'https://' + url[2 - len(url):], stream=True) with open(folder + filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # While alive f.write(chunk) f.flush() # Download the videos, with 12 workers with ThreadPoolExecutor(max_workers=simultaneous_workers) as executor: for url, title in zip(videos, titles): print('Downloading ' + title + '...') executor.submit(download, url, title) sys.stdout.write("\n%d Downloads Finished!" % (len(videos)))
email = tree.xpath('//*[@name="tfa_2"]')[0].items()[3][1] #Goes to XPATH location for phone on the form and stores the information phone = tree.xpath('//*[@name="tfa_94"]')[0].items()[3][1] #If a name was found then it appends all the data it found to the master list if name: master_list.append([id, name, email, phone]) print(f"Name: {name}") print(f"Email: {email}") print(f"Phone: {phone}") #Creates multiple threads so python can make multiple requests to the webpage processes = [] with ThreadPoolExecutor(max_workers=100) as executor: for url in url_list: processes.append(executor.submit(enumerate_form, url)) #Counter for iterating through excel cells counter = 1 #Iterates through all the students in the master list and writes them to excel for student in master_list: counter += 1 sheet['A' + str(counter)].value = student[0] sheet['B' + str(counter)].value = student[1] sheet['C' + str(counter)].value = student[2] sheet['D' + str(counter)].value = student[3] #Saves excel workbook
from threading import Thread import time def task(question, paragraph): s = Summarizer(paragraph, maxSumarySize=3) result = s.get_result() print(question, result) list1.append([question, result]) if __name__ == '__main__': start = time.time() list1 = [] pool = ThreadPoolExecutor(20) for item in collection.find({"category": {"$gt": 0}}): ########利用单线程(串行)########## # summar = Summarizer(paragraph, maxSumarySize=2) # result = summar.get_result() # print(result) # ########开多个线程########### # t=Thread(target=task,args=(item.get("question"),u"{}".format(item.get("answer")))) # t.start() # for i in range(759): # t.join() # print(list1) # stop = time.time() # print(stop-start) ########利用线程池,开线程######## pool.submit(task, item.get("question"),
def executor(self): """single global executor""" cls = self.__class__ if cls._executor is None: cls._executor = ThreadPoolExecutor(1) return cls._executor
def test_worker_node_restart_during_pvc_clone(self, nodes, pvc_clone_factory, pod_factory): """ Verify PVC cloning will succeed if a worker node is restarted while cloning is in progress """ file_name = "fio_test" executor = ThreadPoolExecutor(max_workers=len(self.pvcs) + 1) selected_node = node.get_nodes(node_type=constants.WORKER_MACHINE, num_of_nodes=1) # Run IO log.info("Starting IO on all pods") for pod_obj in self.pods: storage_type = ("block" if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK else "fs") pod_obj.run_io( storage_type=storage_type, size="1G", runtime=20, fio_filename=file_name, end_fsync=1, ) log.info(f"IO started on pod {pod_obj.name}") log.info("Started IO on all pods") # Wait for IO to finish log.info("Wait for IO to finish on pods") for pod_obj in self.pods: pod_obj.get_fio_results() log.info(f"IO finished on pod {pod_obj.name}") # Calculate md5sum file_name_pod = (file_name if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM) else pod_obj.get_storage_path(storage_type="block")) pod_obj.pvc.md5sum = pod.cal_md5sum( pod_obj, file_name_pod, pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK, ) # Restart node log.info(f"Restart node {selected_node[0].name}") restart_thread = executor.submit(nodes.restart_nodes, nodes=selected_node) log.info("Creating clone of all PVCs.") for pvc_obj in self.pvcs: log.info(f"Creating clone of {pvc_obj.name}") pvc_obj.clone_proc = executor.submit(pvc_clone_factory, pvc_obj=pvc_obj, status="") # Check result of 'restart_nodes' restart_thread.result() log.info("Verify status of node.") node.wait_for_nodes_status( node_names=[node.get_node_name(selected_node[0])], status=constants.NODE_READY, timeout=300, ) # Get cloned PVCs cloned_pvcs = [pvc_obj.clone_proc.result() for pvc_obj in self.pvcs] log.info("Verifying cloned PVCs are Bound") for pvc_obj in cloned_pvcs: wait_for_resource_state(resource=pvc_obj, state=constants.STATUS_BOUND, timeout=540) pvc_obj.reload() log.info("Verified: Cloned PVCs are Bound") # Attach the cloned PVCs to pods log.info("Attach the cloned PVCs to pods") clone_pod_objs = [] for pvc_obj in cloned_pvcs: if pvc_obj.volume_mode == "Block": pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML else: pod_dict_path = "" clone_pod_obj = pod_factory( interface=pvc_obj.parent.interface, pvc=pvc_obj, status="", pod_dict_path=pod_dict_path, raw_block_pv=pvc_obj.volume_mode == "Block", ) log.info( f"Attaching the PVC {pvc_obj.name} to pod {clone_pod_obj.name}" ) clone_pod_objs.append(clone_pod_obj) # Verify the new pods are running log.info("Verify the new pods are running") for pod_obj in clone_pod_objs: wait_for_resource_state(pod_obj, constants.STATUS_RUNNING) log.info("Verified: New pods are running") # Verify md5sum for pod_obj in clone_pod_objs: file_name_pod = (pod_obj.get_storage_path(storage_type="block") if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK) else file_name) pod.verify_data_integrity( pod_obj, file_name_pod, pod_obj.pvc.parent.md5sum, pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK, ) log.info( f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} " f"matches with the original md5sum") log.info("Data integrity check passed on all pods") # Run IO log.info("Starting IO on the new pods") for pod_obj in clone_pod_objs: storage_type = ("block" if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK else "fs") pod_obj.run_io( storage_type=storage_type, size="1G", runtime=20, fio_filename=f"{file_name}_1", end_fsync=1, ) log.info(f"IO started on pod {pod_obj.name}") log.info("Started IO on the new pods") # Wait for IO to finish log.info("Wait for IO to finish on the new pods") for pod_obj in clone_pod_objs: pod_obj.get_fio_results() log.info(f"IO finished on pod {pod_obj.name}") log.info("IO finished on the new pods")
def measure_memory(is_gpu, func): import os import psutil from time import sleep class MemoryMonitor: def __init__(self, keep_measuring=True): self.keep_measuring = keep_measuring def measure_cpu_usage(self): max_usage = 0 while True: max_usage = max( max_usage, psutil.Process(os.getpid()).memory_info().rss / 1024**2) sleep(0.005) # 5ms if not self.keep_measuring: break return max_usage def measure_gpu_usage(self): from py3nvml.py3nvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, \ nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, nvmlShutdown, NVMLError max_gpu_usage = [] gpu_name = [] try: nvmlInit() deviceCount = nvmlDeviceGetCount() max_gpu_usage = [0 for i in range(deviceCount)] gpu_name = [ nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)) for i in range(deviceCount) ] while True: for i in range(deviceCount): info = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i)) max_gpu_usage[i] = max(max_gpu_usage[i], info.used / 1024**2) sleep(0.005) # 5ms if not self.keep_measuring: break nvmlShutdown() return [{ "device_id": i, "name": gpu_name[i], "max_used_MB": max_gpu_usage[i] } for i in range(deviceCount)] except NVMLError as error: if not self.silent: self.logger.error( "Error fetching GPU information using nvml: %s", error) return None monitor = MemoryMonitor(False) memory_before_test = monitor.measure_gpu_usage( ) if is_gpu else monitor.measure_cpu_usage() from concurrent.futures import ThreadPoolExecutor with ThreadPoolExecutor() as executor: monitor = MemoryMonitor() mem_thread = executor.submit( monitor.measure_gpu_usage if is_gpu else monitor.measure_cpu_usage) try: fn_thread = executor.submit(func) result = fn_thread.result() finally: monitor.keep_measuring = False max_usage = mem_thread.result() if is_gpu: print( f"GPU memory usage: before={memory_before_test} peak={max_usage}" ) if len(memory_before_test) >= 1 and len(max_usage) >= 1: before = memory_before_test[0]["max_used_MB"] after = max_usage[0]["max_used_MB"] return after - before else: return None else: print( f"CPU memory usage: before={memory_before_test:.1f} MB, peak={max_usage:.1f} MB" ) return max_usage - memory_before_test
import time from concurrent.futures import ThreadPoolExecutor def basic_func(x): if x == 0: return 'zero' elif x % 2 == 0: return 'even' else: return 'odd' def multiprocessing_func(x): y = x * x time.sleep(2) print('{} squared results in a/an {} number'.format(x, basic_func(y))) if __name__ == '__main__': starttime = time.time() ex = ThreadPoolExecutor(max_workers=10) results = ex.map(multiprocessing_func, range(0, 10)) real_results = list(results) print('That took {} seconds'.format(time.time() - starttime))
def train(): parser = argparse.ArgumentParser( description="Acoustic model training script") pkwrap.script_utils.add_chain_recipe_opts(parser) # the idea behind a test config is that one can run different configurations of test parser.add_argument("--test-config", default="test", help="name of the test to be run") parser.add_argument("--decode-iter", default="final") parser.add_argument("--config", default="configs/default") args = parser.parse_args() logging.info("Reading config") cfg_parse = configparser.ConfigParser() cfg_parse.read(args.config) cmd = cfg_parse["cmd"] cpu_cmd = cmd['cpu_cmd'] cuda_cmd = cmd['cuda_cmd'] exp_cfg = cfg_parse["exp"] assert exp_cfg is not None stage = args.stage model_file = exp_cfg["model_file"] data = exp_cfg["data"] if "data" in exp_cfg else "data" exp = exp_cfg["exp"] if "exp" in exp_cfg else "exp" chain_affix = exp_cfg["chain_affix"] if "chain_affix" in exp_cfg else "" chain_dir = os.path.join(exp, f"chain{chain_affix}") dirname = os.path.join(chain_dir, exp_cfg["dirname"]) if not os.path.exists(dirname): os.makedirs(dirname) egs_dir = os.path.join(dirname, "egs") if "e2e" in exp_cfg: is_e2e = bool(exp_cfg["e2e"]) else: is_e2e = False if not is_e2e: gmm_dir = exp_cfg["gmm_dir"] ali_dir = exp_cfg["ali_dir"] lat_dir = exp_cfg["lat_dir"] lores_train_set = exp_cfg["lores_train_set"] tree_dir = exp_cfg["tree_dir"] train_set = exp_cfg["train_set"] lang = exp_cfg["lang"] if "lang" in exp_cfg else "lang" lang_chain = exp_cfg[ "lang_chain"] if "lang_chain" in exp_cfg else "lang_chain" l2_regularize = args.l2_regularize model_opts = pkwrap.trainer.ModelOpts().load_from_config(exp_cfg) frame_subsampling_factor = model_opts.frame_subsampling_factor trainer_opts = pkwrap.trainer.TrainerOpts().load_from_config(exp_cfg) # create lang folder if stage <= 0: logging.info("Creating lang_chain folder from lang") try: rc = subprocess.run([ "shutil/chain/check_lang.sh", lang, lang_chain, ]).returncode except Exception as e: # TODO: should use logging sys.stderr.write(e) sys.stderr.write("ERROR: copying lang failed") logging.info(f"Created {lang_chain} folder") # create lats if stage <= 1: logging.info("Create supervision lattices") try: subprocess.run([ "steps/align_fmllr_lats.sh", "--nj", "{}".format(args.decode_nj), "--cmd", "{}".format(cpu_cmd), lores_train_set, "{}".format(lang), gmm_dir, lat_dir, ]) except Exception as e: logging.error(e) logging.error("Lattice creationg failed") logging.info("Finished creating supervision lattices") # build tree if stage <= 2: tree_file = os.path.join(tree_dir, "tree") if os.path.isfile(tree_file): logging.error(f"Tree file {tree_file} already exists." " Refusing to overwrite".format(tree_file)) quit(1) tree_size = exp_cfg["tree_size"] if "tree_size" in exp_cfg else 7000 logging.info(f"Using tree_size={tree_size}") if not os.path.isfile(os.path.join(tree_dir, '.done')): cmd = [ "steps/nnet3/chain/build_tree.sh", "--frame-subsampling-factor", f"{frame_subsampling_factor}", '--context-opts', "--context-width=2 --central-position=1", "--cmd", "{}".format(cpu_cmd), tree_size, f"{lores_train_set}", f"{lang_chain}", f"{ali_dir}", f"{tree_dir}", ] pkwrap.script_utils.run(cmd) subprocess.run(["touch", "{}/.done".format(tree_dir)]) if not os.path.isdir(dirname): os.makedirs(dirname) logging.info(f"Created {dirname}") if not os.path.isfile(os.path.join(dirname, 'tree')): shutil.copy(os.path.join(tree_dir, "tree"), dirname) learning_rate_factor = 0.5 / args.xent_regularize # create den.fst if stage <= 3: logging.info("Creating den.fst") pkwrap.script_utils.run([ "shutil/chain/make_den_fst.sh", "--cmd", f"{cpu_cmd}", tree_dir, gmm_dir, dirname, ]) if not os.path.isfile(os.path.join(dirname, 'num_pdfs')): logging.info(f"Creating num_pdfs file in {dirname}") num_pdfs = subprocess.check_output([ "tree-info", os.path.join(tree_dir, "tree") ]).decode().strip().split('\n')[0].split()[1] with open(os.path.join(dirname, 'num_pdfs'), 'w') as opf: opf.write(num_pdfs) opf.close() if not os.path.isfile(os.path.join(dirname, "0.trans_mdl")): pkwrap.script_utils.run([ "copy-transition-model", os.path.join(tree_dir, "final.mdl"), os.path.join(dirname, "0.trans_mdl"), ]) # create or copy the egs folder context = None if stage <= 4 and not ("egs_dir" in exp_cfg and exp_cfg["egs_dir"]): logging.info("Creating egs") # first check the context process_out = subprocess.run([ model_file, "--mode", "context", "--dir", dirname, "0.pt", # use a dummy model. ]) if process_out.returncode != 0: quit(process_out.returncode) with open(os.path.join(dirname, 'context')) as ipf: context = int(ipf.readline()) pkwrap.script_utils.run([ "steps/chain/get_egs.sh", "--cmd", cpu_cmd, "--cmvn-opts", "--norm-means=false --norm-vars=false", "--left-context", str(context), "--right-context", str(context), "--frame-subsampling-factor", str(frame_subsampling_factor), "--alignment-subsampling-factor", str(frame_subsampling_factor), "--frames-per-iter", str(trainer_opts.frames_per_iter), "--frames-per-eg", str(trainer_opts.chunk_width), "--srand", str(trainer_opts.srand), "--online-ivector-dir", trainer_opts.online_ivector_dir, train_set, dirname, lat_dir, egs_dir ]) elif "egs_dir" in exp_cfg: egs_dir = exp_cfg["egs_dir"] if not os.path.exists(os.path.join(dirname, 'context')): shutil.copy(os.path.join(egs_dir, 'info', 'left_context'), os.path.join(dirname, 'context')) if context is None: with open(os.path.join(dirname, 'context')) as ipf: context = int(ipf.readline()) model_opts.load_from_config({ 'left_context': context, 'right_context': context }) feat_dim_filename = os.path.join(dirname, "feat_dim") if not os.path.isfile(feat_dim_filename): # if ivector_dim is present in egs_dir, add that to feat_dim if os.path.isfile(os.path.join(egs_dir, 'info', 'ivector_dim')): feat_dim = 0 with open(os.path.join(egs_dir, "info", "feat_dim")) as ipf: feat_dim = int(ipf.readline().strip()) with open(os.path.join(egs_dir, "info", "ivector_dim")) as ipf: feat_dim += int(ipf.readline().strip()) with open(feat_dim_filename, 'w') as opf: opf.write('{}'.format(feat_dim)) opf.close() else: shutil.copy(os.path.join(egs_dir, "info", "feat_dim"), dirname) # we start training with num_archives = pkwrap.script_utils.get_egs_info(egs_dir) num_epochs = trainer_opts.num_epochs # we don't use num of jobs b/c it is 1 for now num_archives_to_process = num_archives * num_epochs * frame_subsampling_factor num_iters = (num_archives_to_process * 2) // ( trainer_opts.num_jobs_initial + trainer_opts.num_jobs_final) # TODO: for stages 5 and 6 (and possibly 7), use ChainTrainer # start the training if stage <= 5: logging.info("Initializing model") process_out = subprocess.run([ *cuda_cmd.split(), os.path.join(dirname, "log", "init.log"), model_file, "--mode", "init", "--dir", dirname, os.path.join(dirname, "0.pt") ]) if process_out.returncode != 0: quit(process_out.returncode) if stage <= 6: train_stage = trainer_opts.train_stage logging.info(f"Starting training from stage={train_stage}") assert train_stage >= 0 num_archives_processed = 0 for iter_no in range(0, num_iters): num_jobs = pkwrap.script_utils.get_current_num_jobs( iter_no, num_iters, trainer_opts.num_jobs_initial, 1, # we don't play with num-jobs-step trainer_opts.num_jobs_final) if iter_no < train_stage: num_archives_processed += num_jobs continue assert num_jobs > 0 lr = pkwrap.script_utils.get_learning_rate( iter_no, num_jobs, num_iters, num_archives_processed, num_archives_to_process, trainer_opts.lr_initial, trainer_opts.lr_final, schedule_type='exponential') diagnostic_job_pool = None if iter_no % trainer_opts.diagnostics_interval == 0: diagnostic_job_pool = submit_diagnostic_jobs( dirname, model_file, iter_no, egs_dir, cuda_cmd, ivector_dir=trainer_opts.online_ivector_dir) logging.info( "{} Running iter={} of {} with {} jobs and lr={:.6f}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), iter_no, num_iters, num_jobs, lr)) with ThreadPoolExecutor(max_workers=num_jobs) as executor: job_pool = [] for job_id in range(1, num_jobs + 1): frame_shift = num_archives_processed % frame_subsampling_factor p = executor.submit( run_job, num_jobs, job_id, dirname, iter_no, model_file, lr, frame_shift, egs_dir, num_archives, num_archives_processed, exp_cfg["minibatch_size"], cuda_cmd, ivector_dir=trainer_opts.online_ivector_dir) num_archives_processed += 1 job_pool.append(p) for p in as_completed(job_pool): if p.result() != 0: quit(p.result()) if num_jobs > 1: model_list = [ os.path.join(dirname, "{}.{}.pt".format(iter_no, job_id)) for job_id in range(1, num_jobs + 1) ] process_out = subprocess.run([ *cuda_cmd.split(), "{}/log/merge.{}.log".format(dirname, iter_no + 1), model_file, "--dir", dirname, "--mode", "merge", "--new-model", os.path.join(dirname, "{}.pt".format(iter_no + 1)), ",".join(model_list) ]) for mdl in model_list: pkwrap.script_utils.run(["rm", mdl]) else: pkwrap.script_utils.run([ "mv", os.path.join(dirname, "{}.1.pt".format(iter_no)), os.path.join(dirname, "{}.pt".format(iter_no + 1)), ]) # remove old model if iter_no >= 20 and (iter_no - 10) % trainer_opts.checkpoint_interval != 0: mdl = os.path.join(dirname, "{}.pt".format(iter_no - 10)) if os.path.isfile(mdl): pkwrap.script_utils.run(["rm", mdl]) # do final model combination model_list = [ os.path.join(dirname, f"{i}.pt") for i in range(num_iters, num_iters - 10, -1) ] logging.info("Final model combination...") diagnostic_name = 'valid' egs_file = os.path.join(egs_dir, '{}_diagnostic.cegs'.format(diagnostic_name)) ivector_opts = [] if trainer_opts.online_ivector_dir: ivector_opts = ["--use-ivector", "True"] pkwrap.script_utils.run([ *cuda_cmd.split(), "{}/log/combine.log".format(dirname), model_file, "--dir", dirname, "--mode", "final_combination", "--new-model", os.path.join(dirname, "final.pt"), "--egs", "ark:{}".format(egs_file), *ivector_opts, ",".join(model_list) ]) graph_dir = "" decode_params = cfg_parse[args.test_config] if "graph_dir" in exp_cfg: graph_dir = exp_cfg["graph_dir"] if "graph_dir" in decode_params: graph_dir = decode_params["graph_dir"] if not graph_dir: graph_dir = os.path.join(dirname, 'graph') if stage <= 7: if not os.path.isfile(os.path.join(graph_dir, 'HCLG.fst')): logging.info("Making graph with {}".format(exp_cfg["lang"])) pkwrap.script_utils.run([ 'utils/mkgraph.sh', '--self-loop-scale', '1.0', exp_cfg["lang"], tree_dir, graph_dir ]) if stage <= 8: final_iter = num_iters - 1 data_dir = decode_params["test_set"] data_name = os.path.basename(data_dir) decode_iter = decode_params[ "iter"] if "iter" in decode_params else "final" decode_affix = decode_params[ "suffix"] if "suffix" in decode_params else "" decode_suff = "_iter{}{}".format(decode_iter, decode_affix) out_dir = os.path.join(dirname, f"decode_{data_name}{decode_suff}") graph = "{}/HCLG.fst".format(graph_dir) if "num_jobs" in decode_params: num_jobs = pkwrap.utils.split_data( data_dir, int(decode_params["num_jobs"]), ) else: num_jobs = pkwrap.utils.split_data(data_dir) logging.info(f"Decoding with {num_jobs} jobs...") ivector_opts = [] if "ivector_dir" in decode_params and len( decode_params["ivector_dir"]) > 0: ivector_opts = ["--ivector-dir", decode_params["ivector_dir"]] if "apply_cmvn" in decode_params and bool(decode_params["apply_cmvn"]): use_cmvn = True cmvn_opts = decode_params["cmvn_opts"] utt2spk_name = "ark:{}/split{}/JOB/utt2spk".format( data_dir, num_jobs) feats_name = "scp:{}/split{}/JOB/feats.scp".format( data_dir, num_jobs) cmvn_name = "scp:{}/split{}/JOB/cmvn.scp".format( data_dir, num_jobs) feats_scp = "ark,s,cs:apply-cmvn {} --utt2spk={} {} {} ark:- |".format( cmvn_opts, utt2spk_name, cmvn_name, feats_name) else: feats_scp = "scp:{}/split{}/JOB/feats.scp".format( data_dir, num_jobs) pkwrap.script_utils.run([ *cpu_cmd.split(), "JOB=1:{}".format(num_jobs), os.path.join(out_dir, "log", "decode.JOB.log"), model_file, "--dir", dirname, "--mode", "decode", *ivector_opts, "--decode-feats", feats_scp, os.path.join(dirname, "{}.pt".format(decode_iter)), "|", "shutil/decode/latgen-faster-mapped.sh", os.path.join(graph_dir, "words.txt"), os.path.join(dirname, "0.trans_mdl"), graph, os.path.join(out_dir, "lat.JOB.gz") ]) opf = open(os.path.join(out_dir, 'num_jobs'), 'w') opf.write('{}'.format(num_jobs)) opf.close() logging.info(f"Scoring...") if not os.path.isfile(os.path.join(out_dir, '../final.mdl')) and \ os.path.isfile(os.path.join(out_dir, '../0.trans_mdl')): pkwrap.script_utils.run([ "ln", "-r", "-s", os.path.join(out_dir, '../0.trans_mdl'), os.path.join(out_dir, '../final.mdl'), ]) pkwrap.script_utils.run( ["local/score.sh", "--cmd", cpu_cmd, data_dir, graph_dir, out_dir]) logging.info(f"Printing best WER...") pkwrap.script_utils.run(" ".join( ["cat", "{}/wer*".format(out_dir), "|", "utils/best_wer.sh"]), shell=True)
def exec(self): pool = ThreadPoolExecutor(10) host_list = self.get_host_list() for host in host_list: pool.submit(self.task, host['hostname'])
def __init__(self, bound, max_workers): self.executor = ThreadPoolExecutor(max_workers=max_workers) self.semaphore = BoundedSemaphore(bound + max_workers)
if auto_position: # we think we know about other bars (currently only py3 threading) if n == 6: tqdm.write("n == 6 completed") if sys.version_info[:1] > (2,): progresser_thread = partial(progresser, auto_position=True) else: progresser_thread = progresser if __name__ == '__main__': freeze_support() # for Windows support print("Multi-processing") p = Pool(len(L), initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) p.map(progresser, L) # unfortunately need ncols # to print spaces over leftover multi-processing bars (#796) with tqdm(leave=False) as t: ncols = t.ncols or 80 print(("{msg:<{ncols}}").format(msg="Multi-threading", ncols=ncols)) with ThreadPoolExecutor(4) as p: p.map(progresser_thread, L) print("Manual nesting") for i in trange(16, desc="1"): for _ in trange(16, desc="2 @ %d" % i, leave=i % 2): sleep(0.01)
def __init__(self, evaluator, n_worker=1): self.evaluator = evaluator self.n_worker = n_worker self.thread_pool = ThreadPoolExecutor(max_workers=n_worker)
def ta(q): p = "demo" print("正在爆破" + str(q)) kv = { "username": p, "password": q, "encodedPassword": "", # "captcha":"ceems", "message": "" } try: r = requests.post(url, kv, headers=headers, allow_redirects=False) print(r.status_code) if r.status_code == 302: print("密码为" + q) with open("用户名.txt", "a") as f: f.write(p + "\t" + q + "\n") exit() except: exit() pool = ThreadPoolExecutor(10) for index in lt_11: pool.submit(ta, index) print("end")
class docTypeHandler(baseHandler): executor = ThreadPoolExecutor(50) # 起线程池,由当前RequestHandler持有 # @tornado.web.asynchronous @tornado.gen.coroutine def post(self): logging.info( '#####################################################################' ) logging.info('Document Type - Start : ' + time.asctime(time.localtime(time.time()))) logging.info( '#####################################################################' ) try: if self.request.headers.get("Content-Type") == "application/json": # logging.info(self.request.body) reqData = json.loads(self.request.body) signRslt = self.verify_sign() if signRslt['success']: try: imgBase64 = reqData['image'] api_id = reqData['api_id'] _docType, _confidence = yield self._docTypePredict( imgBase64) _rslt = {} _rslt['docType'] = _docType _rslt['confidence'] = _confidence self.write_json(data=_rslt) except KeyError as e: self.write_json( data={}, ret=10005, msg='Invalid JSON format, missing api_id/image') except Exception as e: self.write_json(data={}, ret=10002, msg=str(e)) else: self.write_json(data={}, ret=10003, msg=signRslt['msg']) else: self.write_json(data={}, ret=10004, msg='Content-Type need to be application/json') except Exception as e: self.write_json(data={}, ret=10005, msg=str(e)) @run_on_executor def _docTypePredict(self, imgBase64): # Convert the base64 to PIL image __img = readImage(imgBase64, outFormat='PIL') __imgGrey = __img.convert('L') # Get Doc Type by running predict model docType, confidence = _docClass.predict(__imgGrey) return docType, confidence
class EnvironmentHandler(tornado.web.RequestHandler): executor = ThreadPoolExecutor(30) @tornado.web.asynchronous @tornado.gen.coroutine def get(self, APIName): yield self.execute_get(APIName) @tornado.web.asynchronous @tornado.gen.coroutine def post(self, APIName): yield self.execute_post(APIName) @run_on_executor def execute_get(self, APIName): dataResult = DataResult() try: tasks = { 'getEnvironmentInfoById': lambda: self.getEnvironmentInfoById(), 'getEnvironmentInfos': lambda: self.getEnvironmentInfos(), 'getEnvironmentInfoByUserId': lambda: self.getEnvironmentInfoByUserId() # lambda alias } self.write(json.dumps(tasks[APIName]().__dict__, cls=CJsonEncoder)) except: logger.error(traceback.format_exc()) dataResult.setMessage(traceback.format_exc()) dataResult.setSuccess(False) dataResult.setStatusCode(500) self.write(json.dumps(dataResult.__dict__)) finally: try: self.finish() except: pass @run_on_executor def execute_post(self, APIName): dataResult = DataResult() try: tasks = { 'addEnvironmentItem': lambda: self.addEnvironmentItem(), 'deleteEnvironmentItem': lambda: self.deleteEnvironmentItem(), 'editEnvironmentItem': lambda: self.editEnvironmentItem() } self.write(json.dumps(tasks[APIName]().__dict__, cls=CJsonEncoder)) except: logger.error(traceback.format_exc()) dataResult.setMessage(traceback.format_exc()) dataResult.setSuccess(False) dataResult.setStatusCode(500) self.write(json.dumps(dataResult.__dict__)) finally: try: self.finish() except: pass def getEnvironmentInfoById(self): envId = self.get_argument('envId') return EnvironmentService().getEnvironmentInfoById(envId) @AdminDecoratorServer.webInterceptorDecorator(SystemConfig.adminHost) def addEnvironmentItem(self): return EnvironmentService().addEnvironmentItem( json.loads(self.request.body)) @AdminDecoratorServer.webInterceptorDecorator(SystemConfig.adminHost) def deleteEnvironmentItem(self): return EnvironmentService().deleteEnvironmentItem( json.loads(self.request.body)) def editEnvironmentItem(self): return EnvironmentService().editEnvironmentItem( json.loads(self.request.body)) def getEnvironmentInfos(self): return EnvironmentService().getEnvironmentInfos() def getEnvironmentInfoByUserId(self): useId = self.get_argument('userId') return EnvironmentService().getEnvironmentInfosByUserId(useId)
def __new__(cls, *args, **kwargs): if not getattr(cls, '_instance', None): cls._instance = ThreadPoolExecutor(max_workers=10) return cls._instance
def compare_results(f_block, l_block, url1, url2, max_tries=10, timeout=0.1): global wdir global errors print("Compare blocks [{} : {}]".format(f_block, l_block)) for i in range(f_block, l_block + 1): request = bytes( json.dumps({ "jsonrpc": "2.0", "id": i, "method": "account_history_api.get_ops_in_block", "params": { "block_num": i, "only_virtual": False } }), "utf-8") + b"\r\n" with ThreadPoolExecutor(max_workers=2) as executor: #with ProcessPoolExecutor(max_workers=2) as executor: future1 = executor.submit(dpnd_call, url1, data=request, max_tries=max_tries, timeout=timeout) future2 = executor.submit(dpnd_call, url2, data=request, max_tries=max_tries, timeout=timeout) status1, json1 = future1.result() status2, json2 = future2.result() #status1, json1 = dpnd_call(url1, data=request, max_tries=max_tries, timeout=timeout) #status2, json2 = dpnd_call(url2, data=request, max_tries=max_tries, timeout=timeout) if status1 == False or status2 == False or json1 != json2: print("Difference @block: {}\n".format(i)) errors += 1 filename = wdir / Path(str(f_block) + "_" + str(l_block) + ".log") try: file = filename.open("w") except: print("Cannot open file:", filename) return file.write("Difference @block: {}\n".format(i)) file.write("{} response:\n".format(url1)) json.dump(json1, file, indent=2, sort_keys=True) file.write("\n") file.write("{} response:\n".format(url2)) json.dump(json2, file, indent=2, sort_keys=True) file.write("\n") file.close() print("Compare blocks [{} : {}] break with error".format( f_block, l_block)) return print("Compare blocks [{} : {}] finished".format(f_block, l_block))
def __init__(self, hass, config, async_add_entities, devices, users): self._state = None self._sub_state = None self._file_path = hass.data[DOMAIN]['port'] self._available = False self._f = None self._hass = hass self._config = config self._model = 'Unknown' self._lock = threading.BoundedSemaphore() self._stop = threading.Event() self._data_flowing = threading.Event() self._async_add_entities = async_add_entities self.devices = {dev.dev_id: dev for dev in devices} self.users = users self._is_updating = asyncio.Lock() self._activation_packet = b'' self._mode = '55' """ default binary strings for comparing states in d8 packets """ self._old_bin_string = '0'.zfill(32) self._new_bin_string = '0'.zfill(32) """Since MQTT is run on separate instance I will connect directly""" if hass.data[DOMAIN]['mqtt_external']: self._mqtt_enabled = True _LOGGER.info("(__init__) MQTT external: %s", self._mqtt_enabled) else: self._mqtt_enabled = hass.services.has_service('mqtt', 'publish') _LOGGER.info("(__init__) MQTT enabled? %s", self._mqtt_enabled) if self._mqtt_enabled: self._mqtt = hass.components.mqtt self._data_topic = hass.data[DOMAIN]['data_topic'] _LOGGER.info('DeviceScanner.__init__(): serial port: %s', format(self._file_path)) switcher = { "0": b'\x30', "1": b'\x31', "2": b'\x32', "3": b'\x33', "4": b'\x34', "5": b'\x35', "6": b'\x36', "7": b'\x37', "8": b'\x38', "9": b'\x39' } try: """ generate activation packet containing the alarm code, to trigger the right sensor packets """ packet_code = b'' for c in hass.data[DOMAIN]['code']: packet_code = packet_code + switcher.get(c) self._activation_packet = b'\x80\x08\x03\x39\x39\x39' + packet_code hass.bus.async_listen('homeassistant_stop', self.shutdown_threads) self._io_pool_exc = ThreadPoolExecutor(max_workers=5) self._read_loop_future = self._io_pool_exc.submit(self._read_loop) self._watcher_loop_keepalive_future = self._io_pool_exc.submit( self._watcher_loop_keepalive) self._watcher_loop_triggersensorupdate_future = self._io_pool_exc.submit( self._watcher_loop_triggersensorupdate) except Exception as ex: _LOGGER.error('Unexpected error 1: %s', format(ex))
def get_existing_paths(self): """Discovers existing paths in a bucket. Faster alternative to using native google.cloud.storage.bucket.Bucket's list_blobs() method. Generates all combinations of files using FILE_PARAMETERS, and checks if the first file in that combination exists. If so, it is added to existing_paths set. Creating a set of the first files for each combinations rather than generating a list of all 1, 100, 1000, or 10000 files per combination (depending on the number of files in the combination) saves time and space. Returns: existing_paths: set containing paths that already exist in given bucket """ def _path_exists(path_details): """Adds a path to the path_set if it exists. Constructs a path based off of the parameters in the path_details tuple. Checks that the constructed path exists in the bucket defined in the outer function. If so, the path is added to path_set. Args: path_details (tuple): of (file_type, num_column, column_type, num_file, table_size) """ file_type, \ num_column, \ column_type, \ num_file, \ table_size = path_details for compression_type in compression_types[file_type]: if compression_type == 'none': extension = file_type else: extension = compression_extensions[compression_type] path = path_string.format( file_type, compression_type, num_column, column_type, num_file, table_size, extension, ) exists = storage.Blob( bucket=bucket, name=path, ).exists(gcs_client) if exists: path_set.add(path) logging.info('Discovering files from parameters list that exist' ' in bucket {0:s}.'.format(self.bucket_name)) file_types = self.file_params['fileType'] compression_types = self.file_params['fileCompressionTypes'] num_columns = self.file_params['numColumns'] column_types = self.file_params['columnTypes'] num_files = self.file_params['numFiles'] table_sizes = self.file_params['stagingDataSizes'] compression_extensions = ( file_constants.FILE_CONSTANTS['compressionExtensions']) path_set = set() path_string = ('fileType={0:s}/compression={1:s}/numColumns={2:d}/' 'columnTypes={3:s}/numFiles={4:d}/tableSize={5:s}/' 'file1.{6:s}') gcs_client = storage.Client(project=self.project_id) bucket = gcs_client.get_bucket(self.bucket_name) with ThreadPoolExecutor() as p: p.map( _path_exists, itertools.product( file_types, num_columns, column_types, num_files, table_sizes, )) logging.info('Done discovering {0:d} existing files.'.format( len(path_set))) return path_set
def generate_quantities( self, data: Union[Dict, str] = None, mcmc_sample: Union[CmdStanMCMC, List[str]] = None, seed: int = None, gq_output_dir: str = None, ) -> CmdStanGQ: """ Run CmdStan's generate_quantities method which runs the generated quantities block of a model given an existing sample. This function takes a CmdStanMCMC object and the dataset used to generate that sample and calls to the CmdStan ``generate_quantities`` method to generate additional quantities of interest. The ``CmdStanGQ`` object records the command, the return code, and the paths to the generate method output csv and console files. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. Output filenames are composed of the model name, a timestamp in the form YYYYMMDDhhmm and the chain id, plus the corresponding filetype suffix, either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. `bernoulli_ppc-201912081451-1.csv`. Output files written to the temporary directory contain an additional 8-character random string, e.g. `bernoulli_ppc-201912081451-1-5nm6as7u.csv`. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param mcmc_sample: Can be either a CmdStanMCMC object returned by CmdStanPy's `sample` method or a list of stan-csv files generated by fitting the model to the data using any Stan interface. :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. *NOTE: Specifying the seed will guarantee the same result for multiple invocations of this method with the same inputs. However this will not reproduce results from the sample method given the same inputs because the RNG will be in a different state.* :param gq_output_dir: Name of the directory in which the CmdStan output files are saved. If unspecified, files will be written to a temporary directory which is deleted upon session exit. :return: CmdStanGQ object """ sample_csv_files = [] sample_drawset = None chains = 0 if isinstance(mcmc_sample, CmdStanMCMC): sample_csv_files = mcmc_sample.runset.csv_files sample_drawset = mcmc_sample.get_drawset() chains = mcmc_sample.chains elif isinstance(mcmc_sample, list): sample_csv_files = mcmc_sample else: raise ValueError( 'mcmc_sample must be either CmdStanMCMC object' ' or list of paths to sample csv_files' ) try: chains = len(sample_csv_files) if sample_drawset is None: # assemble sample from csv files sampler_args = SamplerArgs() args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], method_args=sampler_args, ) runset = RunSet(args=args, chains=chains) runset._csv_files = sample_csv_files sample_fit = CmdStanMCMC(runset) sample_fit._validate_csv_files() sample_drawset = sample_fit.get_drawset() except ValueError as e: raise ValueError( 'Invalid mcmc_sample, error:\n\t{}\n\t' ' while processing files\n\t{}'.format( repr(e), '\n\t'.join(sample_csv_files) ) ) generate_quantities_args = GenerateQuantitiesArgs( csv_files=sample_csv_files ) generate_quantities_args.validate(chains) with MaybeDictToFilePath(data, None) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], data=_data, seed=seed, output_dir=gq_output_dir, method_args=generate_quantities_args, ) runset = RunSet(args=args, chains=chains) cores_avail = cpu_count() cores = max(min(cores_avail - 2, chains), 1) with ThreadPoolExecutor(max_workers=cores) as executor: for i in range(chains): executor.submit(self._run_cmdstan, runset, i) if not runset._check_retcodes(): msg = 'Error during generate_quantities' for i in range(chains): if runset._retcode(i) != 0: msg = '{}, chain {} returned error code {}'.format( msg, i, runset._retcode(i) ) raise RuntimeError(msg) quantities = CmdStanGQ(runset=runset, mcmc_sample=sample_drawset) quantities._set_attrs_gq_csv_files(sample_csv_files[0]) return quantities
"beauty", "hair", "apple", "macbook", "calcukator", "pen", "glass", "note 8", "samsung", "wallet", "watch" ] products = [] threads = [] THREADING_LIMIT = 444 executor = ThreadPoolExecutor(max_workers=THREADING_LIMIT) started_threads = queue.Queue(maxsize=1000000) not_started_threads = queue.Queue(maxsize=1000000) elastic_search = None class ScrapingThread(threading.Thread): def __init__(self, asin, search_txt, type, url, strt, endd): threading.Thread.__init__(self) self.asin = asin self.search_text = search_txt self.type = type self.starting = strt self.ending = endd self.url = url
import ntpath import time import os from tensorflow.keras import backend as K import argparse from pathlib import Path from multiprocessing import Process from time import time try: from armv7l.openvino.inference_engine import IENetwork, IECore except: from openvino.inference_engine import IENetwork, IECore from skimage.transform import resize from concurrent.futures import ThreadPoolExecutor executor = ThreadPoolExecutor(max_workers=16) parser = argparse.ArgumentParser() parser.add_argument('-ip', '--input_path', \ default='D:/00_NCSU/00_Resources/00_Datasets/oak_NC_MD_grassclover/val/', \ type=str, help="Input Path") parser.add_argument("--xml_path",\ default="/home/pi/OAK-D-depthai-expts/02-NCS2-mode/FP16/3class_360/3class_360.xml", \ help="Path of the deeplabv3plus openvino model.") parser.add_argument('-pb_path', '--tf_pb_path', \ default = "D:/00_NCSU/Fall2020/ECE633_IndividualTopics/OAK-D-Weed-Cam/Model/deeplabv3+/models/3_class_model_mobilenet_v3_small_v2.1/3_class_model_mobilenet_v3_small_v2.1_1080x1920.pb", \ type=str, help='Model Path for tensorflow file') parser.add_argument('-ms', '--model_size', default=(1080, 1920), type=int,
def sample( self, data: Union[Dict, str] = None, chains: Union[int, None] = None, cores: Union[int, None] = None, seed: Union[int, List[int]] = None, chain_ids: Union[int, List[int]] = None, inits: Union[Dict, float, str, List[str]] = None, warmup_iters: int = None, sampling_iters: int = None, save_warmup: bool = False, thin: int = None, max_treedepth: float = None, metric: Union[str, List[str]] = None, step_size: Union[float, List[float]] = None, adapt_engaged: bool = True, adapt_delta: float = None, fixed_param: bool = False, output_dir: str = None, save_diagnostics: bool = False, show_progress: Union[bool, str] = False ) -> CmdStanMCMC: """ Run or more chains of the NUTS sampler to produce a set of draws from the posterior distribution of a model conditioned on some data. This function validates the specified configuration, composes a call to the CmdStan ``sample`` method and spawns one subprocess per chain to run the sampler and waits for all chains to run to completion. Unspecified arguments are not included in the call to CmdStan, i.e., those arguments will have CmdStan default values. For each chain, the ``CmdStanMCMC`` object records the command, the return code, the sampler output file paths, and the corresponding console outputs, if any. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. The output filenames are composed of the model name, a timestamp in the form YYYYMMDDhhmm and the chain id, plus the corresponding filetype suffix, either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. `bernoulli-201912081451-1.csv`. Output files written to the temporary directory contain an additional 8-character random string, e.g. `bernoulli-201912081451-1-5nm6as7u.csv`. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param chains: Number of sampler chains, should be > 1. :param cores: Number of processes to run in parallel. Must be an integer between 1 and the number of CPUs in the system. If none then set automatically to `chains` but no more than `total_cpu_count - 2` :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. When the same seed is used across all chains, the chain-id is used to advance the RNG to avoid dependent samples. :param chain_ids: The offset for the random number generator, either an integer or a list of unique per-chain offsets. If unspecified, chain ids are numbered sequentially starting from 1. :param inits: Specifies how the sampler initializes parameter values. Initialization is either uniform random on a range centered on 0, exactly 0, or a dictionary or file of initial values for some or all parameters in the model. The default initialization behavior will initialize all parameter values on range [-2, 2] on the _unconstrained_ support. If the expected parameter values are too far from this range, this option may improve adaptation. The following value types are allowed: * Single number ``n > 0`` - initialization range is [-n, n]. * ``0`` - all parameters are initialized to 0. * dictionary - pairs parameter name : initial value. * string - pathname to a JSON or Rdump data file. * list of strings - per-chain pathname to data file. :param warmup_iters: Number of warmup iterations for each chain. :param sampling_iters: Number of draws from the posterior for each chain. :param save_warmup: When True, sampler saves warmup draws as part of the Stan csv output file. :param thin: Period between saved samples. :param max_treedepth: Maximum depth of trees evaluated by NUTS sampler per iteration. :param metric: Specification of the mass matrix, either as a vector consisting of the diagonal elements of the covariance matrix (``diag`` or ``diag_e``) or the full covariance matrix (``dense`` or ``dense_e``). If the value of the metric argument is a string other than ``diag``, ``diag_e``, ``dense``, or ``dense_e``, it must be a valid filepath to a JSON or Rdump file which contains an entry ``inv_metric`` whose value is either the diagonal vector or the full covariance matrix. If the value of the metric argument is a list of paths, its length must match the number of chains and all paths must be unique. :param step_size: Initial stepsize for HMC sampler. The value is either a single number or a list of numbers which will be used as the global or per-chain initial step_size, respectively. The length of the list of step sizes must match the number of chains. :param adapt_engaged: When True, adapt stepsize and metric. *Note: If True, ``warmup_iters`` must be > 0.* :param adapt_delta: Adaptation target Metropolis acceptance rate. The default value is 0.8. Increasing this value, which must be strictly less than 1, causes adaptation to use smaller step sizes. It improves the effective sample size, but may increase the time per iteration. :param fixed_param: When True, call CmdStan with argument "algorithm=fixed_param" which runs the sampler without updating the Markov Chain, thus the values of all parameters and transformed parameters are constant across all draws and only those values in the generated quantities block that are produced by RNG functions may change. This provides a way to use Stan programs to generate simulated data via the generated quantities block. This option must be used when the parameters block is empty. Default value is False. :param output_dir: Name of the directory to with the CmdStan output files are written. If unspecified, output files will be written to a temporary directory which is deleted upon session exit. :param save_diagnostics: Whether or not to save diagnostics. If True, csv output files are written to ``<basename>-diagnostic-<chain_id>.csv.``, where ``<basename>`` is set with ``csv_basename``. :param show_progress: Use tqdm progress bar to show sampling progress. If show_progress=='notebook' use tqdm_notebook (needs nodejs for jupyter). :return: CmdStanMCMC object """ if chains is None: if fixed_param: chains = 1 else: chains = 4 if chains < 1: raise ValueError( 'chains must be a positive integer value, found {}'.format( chains ) ) if chain_ids is None: chain_ids = [x + 1 for x in range(chains)] else: if isinstance(chain_ids, int): if chain_ids < 1: raise ValueError( 'chain_id must be a positive integer value,' ' found {}'.format(chain_ids) ) offset = chain_ids chain_ids = [x + offset + 1 for x in range(chains)] else: if not len(chain_ids) == chains: raise ValueError( 'chain_ids must correspond to number of chains' ' specified {} chains, found {} chain_ids'.format( chains, len(chain_ids) ) ) for i in len(chain_ids): if chain_ids[i] < 1: raise ValueError( 'chain_id must be a positive integer value,' ' found {}'.format(chain_ids[i]) ) cores_avail = cpu_count() if cores is None: cores = max(min(cores_avail - 2, chains), 1) if cores < 1: raise ValueError( 'cores must be a positive integer value, found {}'.format(cores) ) if cores > cores_avail: self._logger.warning( 'requested %u cores, only %u available', cores, cpu_count() ) cores = cores_avail refresh = None if show_progress: try: import tqdm self._logger.propagate = False except ImportError: self._logger.warning( ( 'tqdm not installed, progress information is not ' 'shown. Please install tqdm with ' "'pip install tqdm'" ) ) show_progress = False # TODO: issue 49: inits can be initialization function sampler_args = SamplerArgs( warmup_iters=warmup_iters, sampling_iters=sampling_iters, save_warmup=save_warmup, thin=thin, max_treedepth=max_treedepth, metric=metric, step_size=step_size, adapt_engaged=adapt_engaged, adapt_delta=adapt_delta, fixed_param=fixed_param, ) with MaybeDictToFilePath(data, inits) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=chain_ids, data=_data, seed=seed, inits=_inits, output_dir=output_dir, save_diagnostics=save_diagnostics, method_args=sampler_args, refresh=refresh, ) runset = RunSet(args=args, chains=chains) pbar = None all_pbars = [] with ThreadPoolExecutor(max_workers=cores) as executor: for i in range(chains): if show_progress: if ( isinstance(show_progress, str) and show_progress.lower() == 'notebook' ): try: tqdm_pbar = tqdm.tqdm_notebook except ImportError: msg = ( 'Cannot import tqdm.tqdm_notebook.\n' 'Functionality is only supported on the ' 'Jupyter Notebook and compatible platforms' '.\nPlease follow the instructions in ' 'https://github.com/tqdm/tqdm/issues/394#' 'issuecomment-384743637 and remember to ' 'stop & start your jupyter server.' ) self._logger.warning(msg) tqdm_pbar = tqdm.tqdm else: tqdm_pbar = tqdm.tqdm # enable dynamic_ncols for advanced users # currently hidden feature dynamic_ncols = os.environ.get( 'TQDM_DYNAMIC_NCOLS', 'False' ) if dynamic_ncols.lower() in ['0', 'false']: dynamic_ncols = False else: dynamic_ncols = True pbar = tqdm_pbar( desc='Chain {} - warmup'.format(i + 1), position=i, total=1, # Will set total from Stan's output dynamic_ncols=dynamic_ncols, ) all_pbars.append(pbar) executor.submit(self._run_cmdstan, runset, i, pbar) # Closing all progress bars for pbar in all_pbars: pbar.close() if show_progress: # re-enable logger for console self._logger.propagate = True if not runset._check_retcodes(): msg = 'Error during sampling' for i in range(chains): if runset._retcode(i) != 0: msg = '{}, chain {} returned error code {}'.format( msg, i, runset._retcode(i) ) raise RuntimeError(msg) mcmc = CmdStanMCMC(runset, fixed_param) mcmc._validate_csv_files() return mcmc
def filter_failures(failureThreshold, folder, threads): executor = ThreadPoolExecutor(max_workers=threads) for root, _dirs, mutations in os.walk(folder): for mutation in mutations: executor.submit(filter, root, mutation, failureThreshold) executor.shutdown(wait=True)
class Discover(Task): """Custom Celery Task class. http://docs.celeryproject.org/en/latest/userguide/tasks.html#custom-task-classes """ name = 'Discover' task_id = None # If a simhash calculation for a URL & year does more than # `max_download_errors`, stop it to avoid pointless requests. The captures # are not text/html or there is a problem with the WBM. max_download_errors = 10 max_capture_download = 1000000 def __init__(self, cfg): self.simhash_size = cfg['simhash']['size'] self.simhash_expire = cfg['simhash']['expire_after'] if self.simhash_size > 512: raise Exception('do not support simhash longer than 512') headers = { 'User-Agent': 'wayback-discover-diff', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive' } cdx_auth_token = cfg.get('cdx_auth_token') if cdx_auth_token: headers['cookie'] = 'cdx_auth_token=%s' % cdx_auth_token self.http = urllib3.HTTPConnectionPool('web.archive.org', maxsize=50, retries=4, headers=headers) self.redis = StrictRedis(connection_pool=BlockingConnectionPool. from_url(cfg['redis_uri'], max_connections=50, timeout=cfg.get('redis_timeout', 10), decode_responses=True)) self.tpool = ThreadPoolExecutor(max_workers=cfg['threads']) self.snapshots_number = cfg['snapshots']['number_per_year'] self.download_errors = 0 # Initialize logger self._log = logging.getLogger('wayback_discover_diff.worker') def download_capture(self, ts): """Download capture data from the WBM and update job status. Return data only when its text or html. On download error, increment download_errors which will stop the task after 10 errors. Fetch data up to a limit to avoid getting too much (which is unnecessary) and have a consistent operation time. """ try: statsd_incr('download-capture') self._log.info('fetching capture %s %s', ts, self.url) res = self.http.request('GET', '/web/%sid_/%s' % (ts, self.url), preload_content=False) data = res.read(self.max_capture_download) ctype = res.headers.get('content-type') res.release_conn() if ctype: ctype = ctype.lower() if "text" in ctype or "html" in ctype: return data except HTTPError as exc: self.download_errors += 1 self._log.error('cannot fetch capture %s %s', ts, self.url, exc_info=1) return None def start_profiling(self, snapshot, index): """Used for performance testing only. """ cProfile.runctx('self.get_calc(snapshot, index)', globals=globals(), locals=locals(), filename='profile.prof') def get_calc(self, capture): """if a capture with an equal digest has been already processed, return cached simhash and avoid redownloading and processing. Else, download capture, extract HTML features and calculate simhash. If there are already too many download failures, return None without any processing to avoid pointless requests. Return None if any problem occurs (e.g. HTTP error or cannot calculate) """ (timestamp, digest) = capture.split(' ') simhash_enc = self.seen.get(digest) if simhash_enc: self._log.info("already seen %s", digest) return (timestamp, simhash_enc) if self.download_errors >= self.max_download_errors: self._log.error( '%d consecutive download errors fetching %s captures', self.download_errors, self.url) return None response_data = self.download_capture(timestamp) if response_data: data = extract_html_features(response_data) if data: statsd_incr('calculate-simhash') self._log.info("calculating simhash") simhash = calculate_simhash(data, self.simhash_size, hashfunc=custom_hash_function) # This encoding is necessary to store simhash data in Redis. simhash_enc = base64.b64encode( pack_simhash_to_bytes(simhash, self.simhash_size)) self.seen[digest] = simhash_enc return (timestamp, simhash_enc) return None def run(self, url, year, created): """Run Celery Task. """ self.job_id = self.request.id self.url = url_fix(url) time_started = datetime.now() self._log.info('Start calculating simhashes.') self.download_errors = 0 if not self.url: self._log.error('did not give url parameter') return {'status': 'error', 'info': 'URL is required.'} if not year: self._log.error('did not give year parameter') return {'status': 'error', 'info': 'Year is required.'} # fetch captures self.update_state( state='PENDING', meta={'info': 'Fetching %s captures for year %s' % (url, year)}) resp = self.fetch_cdx(url, year) if resp.get('status') == 'error': return resp captures = resp.get('captures') total = len(captures) self.seen = dict() # calculate simhashes in parallel i = 0 final_results = {} for res in self.tpool.map(self.get_calc, captures): if not res: continue (timestamp, simhash) = res if simhash: final_results[timestamp] = simhash if i % 10 == 0: self.update_state(state='PENDING', meta={ 'info': 'Processed %d out of %d captures.' % (i, total) }) i += 1 self._log.info('%d final results for %s and year %s.', len(final_results), self.url, year) if final_results: try: urlkey = surt(self.url) self.redis.hmset(urlkey, final_results) self.redis.expire(urlkey, self.simhash_expire) except RedisError as exc: self._log.error('cannot write simhashes to Redis for URL %s', self.url, exc_info=1) duration = (datetime.now() - time_started).seconds self._log.info('Simhash calculation finished in %.2fsec.', duration) return {'duration': str(duration)} def fetch_cdx(self, url, year): """Make a CDX query for timestamp and digest for a specific year. """ try: self._log.info('fetching CDX of %s for year %s', url, year) # Collapse captures by timestamp to get 3 captures per day (max). # TODO increase that in the future when we can handle more captures. # Its necessary to reduce the huge number of captures some websites # (e.g. twitter.com has 167k captures for 2018. Get only 2xx captures. fields = { 'url': url, 'from': year, 'to': year, 'statuscode': 200, 'fl': 'timestamp,digest', 'collapse': 'timestamp:9' } if self.snapshots_number != -1: fields['limit'] = self.snapshots_number response = self.http.request('GET', '/web/timemap', fields=fields) self._log.info('finished fetching timestamps of %s for year %s', self, year) if response.status == 200: if not response.data: self._log.info('no captures found for %s %s', self, year) urlkey = surt(url) self.redis.hset(urlkey, year, -1) self.redis.expire(urlkey, self.simhash_expire) return { 'status': 'error', 'info': 'No captures of %s for year %s' % (url, year) } captures_txt = response.data.decode('utf-8') captures = captures_txt.strip().split("\n") if captures: return {'status': 'success', 'captures': captures} return { 'status': 'error', 'info': 'No captures of %s for year %s' % (url, year) } except (ValueError, HTTPError) as exc: self._log.error('invalid CDX query response for %s %s', url, year, exc_info=1) return {'status': 'error', 'info': str(exc)} except RedisError as exc: self._log.error('error connecting with Redis for url %s year %s', url, year, exc_info=1) return {'status': 'error', 'info': str(exc)}
def run_io_tasks_in_parallel(tasks): with ThreadPoolExecutor() as executor: running_tasks = [executor.submit(task) for task in tasks] for running_task in running_tasks: running_task.result()
class LiveChat: ''' LiveChat object fetches chat data and stores them in a buffer with ThreadpoolExecutor. Parameter --------- video_id : str seektime : int start position of fetching chat (seconds). This option is valid for archived chat only. If negative value, chat data posted before the start of the broadcast will be retrieved as well. processor : ChatProcessor buffer : Buffer buffer of chat data fetched background. interruptable : bool Allows keyboard interrupts. Set this parameter to False if your own threading program causes the problem. callback : func function called periodically from _listen(). done_callback : func function called when listener ends. direct_mode : bool If True, invoke specified callback function without using buffer. callback is required. If not, IllegalFunctionCall will be raised. force_replay : bool force to fetch archived chat data, even if specified video is live. topchat_only : bool If True, get only top chat. Attributes --------- _executor : ThreadPoolExecutor This is used for _listen() loop. _is_alive : bool Flag to stop getting chat. ''' _setup_finished = False def __init__(self, video_id, seektime=-1, processor=DefaultProcessor(), buffer=None, interruptable=True, callback=None, done_callback=None, direct_mode=False, force_replay=False, topchat_only=False, logger=config.logger(__name__)): self._video_id = extract_video_id(video_id) self.seektime = seektime if isinstance(processor, tuple): self.processor = Combinator(processor) else: self.processor = processor self._buffer = buffer self._callback = callback self._done_callback = done_callback self._executor = ThreadPoolExecutor(max_workers=2) self._direct_mode = direct_mode self._is_alive = True self._is_replay = force_replay self._parser = Parser(is_replay=self._is_replay) self._pauser = Queue() self._pauser.put_nowait(None) self._first_fetch = True self._fetch_url = "live_chat/get_live_chat?continuation=" self._topchat_only = topchat_only self._event = Event() self._logger = logger self.exception = None if interruptable: signal.signal(signal.SIGINT, lambda a, b: self.terminate()) self._setup() def _setup(self): # An exception is raised when direct mode is true and no callback is set. if self._direct_mode: if self._callback is None: raise exceptions.IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: # Create a default buffer if `direct_mode` is False and buffer is not set. if self._buffer is None: self._buffer = Buffer(maxsize=20) # Create a loop task to call callback if the `callback` param is specified. if self._callback is None: pass else: # Start a loop task calling callback function. self._executor.submit(self._callback_loop, self._callback) # Start a loop task for _listen() self.listen_task = self._executor.submit(self._startlisten) # Register add_done_callback if self._done_callback is None: self.listen_task.add_done_callback(self._finish) else: self.listen_task.add_done_callback(self._done_callback) def _startlisten(self): time.sleep(0.1) # sleep shortly to prohibit skipping fetching data """Fetch first continuation parameter, create and start _listen loop. """ initial_continuation = liveparam.getparam(self._video_id, 3) self._listen(initial_continuation) def _listen(self, continuation): ''' Fetch chat data and store them into buffer, get next continuaiton parameter and loop. Parameter --------- continuation : str parameter for next chat data ''' try: with httpx.Client(http2=True) as client: while (continuation and self._is_alive): continuation = self._check_pause(continuation) contents = self._get_contents(continuation, client, headers) metadata, chatdata = self._parser.parse(contents) timeout = metadata['timeoutMs'] / 1000 chat_component = { "video_id": self._video_id, "timeout": timeout, "chatdata": chatdata } time_mark = time.time() if self._direct_mode: processed_chat = self.processor.process( [chat_component]) if isinstance(processed_chat, tuple): self._callback(*processed_chat) else: self._callback(processed_chat) else: self._buffer.put(chat_component) diff_time = timeout - (time.time() - time_mark) self._event.wait(diff_time if diff_time > 0 else 0) continuation = metadata.get('continuation') except exceptions.ChatParseException as e: self._logger.debug(f"[{self._video_id}]{str(e)}") raise except (TypeError, json.JSONDecodeError): self._logger.error(f"{traceback.format_exc(limit=-1)}") raise self._logger.debug(f"[{self._video_id}] finished fetching chat.") raise exceptions.ChatDataFinished def _check_pause(self, continuation): if self._pauser.empty(): '''pause''' self._pauser.get() '''resume: prohibit from blocking by putting None into _pauser. ''' self._pauser.put_nowait(None) if not self._is_replay: continuation = liveparam.getparam(self._video_id, 3) return continuation def _get_contents(self, continuation, client, headers): '''Get 'continuationContents' from livechat json. If contents is None at first fetching, try to fetch archive chat data. Return: ------- 'continuationContents' which includes metadata & chat data. ''' livechat_json = (self._get_livechat_json(continuation, client, headers)) contents = self._parser.get_contents(livechat_json) if self._first_fetch: if contents is None or self._is_replay: '''Try to fetch archive chat data.''' self._parser.is_replay = True self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" continuation = arcparam.getparam(self._video_id, self.seektime, self._topchat_only) livechat_json = (self._get_livechat_json( continuation, client, headers)) reload_continuation = self._parser.reload_continuation( self._parser.get_contents(livechat_json)) if reload_continuation: livechat_json = (self._get_livechat_json( reload_continuation, client, headers)) contents = self._parser.get_contents(livechat_json) self._is_replay = True self._first_fetch = False return contents def _get_livechat_json(self, continuation, client, headers): ''' Get json which includes chat data. ''' continuation = urllib.parse.quote(continuation) livechat_json = None url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" for _ in range(MAX_RETRY + 1): with client: try: livechat_json = client.get(url, headers=headers).json() break except (json.JSONDecodeError, httpx.HTTPError): time.sleep(2) continue else: self._logger.error(f"[{self._video_id}]" f"Exceeded retry count.") raise exceptions.RetryExceedMaxCount() return livechat_json def _callback_loop(self, callback): """ If a callback is specified in the constructor, it throws chat data at regular intervals to the function specified in the callback in the backgroun Parameter --------- callback : func function to which the processed chat data is passed. """ while self.is_alive(): items = self._buffer.get() processed_chat = self.processor.process(items) if isinstance(processed_chat, tuple): self._callback(*processed_chat) else: self._callback(processed_chat) def get(self): """ Retrieves data from the buffer, throws it to the processor, and returns the processed chat data. Returns : Chat data processed by the Processor """ if self._callback is None: if self.is_alive(): items = self._buffer.get() return self.processor.process(items) else: return [] raise exceptions.IllegalFunctionCall( "Callback parameter is already set, so get() cannot be performed.") def is_replay(self): return self._is_replay def pause(self): if self._callback is None: return if not self._pauser.empty(): self._pauser.get() def resume(self): if self._callback is None: return if self._pauser.empty(): self._pauser.put_nowait(None) def is_alive(self): return self._is_alive def _finish(self, sender): '''Called when the _listen() task finished.''' try: self._task_finished() except CancelledError: self._logger.debug(f'[{self._video_id}] cancelled:{sender}') def terminate(self): ''' Terminate fetching chats. ''' if self._pauser.empty(): self._pauser.put_nowait(None) self._is_alive = False self._buffer.put({}) self._event.set() self.processor.finalize() def _task_finished(self): if self.is_alive(): self.terminate() try: self.listen_task.result() except Exception as e: self.exception = e if not isinstance(e, exceptions.ChatParseException): self._logger.error(f'Internal exception - {type(e)}{str(e)}') self._logger.info(f'[{self._video_id}] finished.') def raise_for_status(self): if self.exception is not None: raise self.exception