class TestQueue(object): def __init__(self, test_source_cls, test_type, tests, **kwargs): self.queue = None self.test_source_cls = test_source_cls self.test_type = test_type self.tests = tests self.kwargs = kwargs self.queue = None def __enter__(self): if not self.tests[self.test_type]: return None self.queue = Queue() has_tests = self.test_source_cls.queue_tests(self.queue, self.test_type, self.tests, **self.kwargs) # There is a race condition that means sometimes we continue # before the tests have been written to the underlying pipe. # Polling the pipe for data here avoids that self.queue._reader.poll(10) assert not self.queue.empty() return self.queue def __exit__(self, *args, **kwargs): if self.queue is not None: self.queue.close() self.queue = None
class Channel(object): def __init__(self): self._in = Queue() self._out = Queue() def incoroutine(self): return coroutine.self() is self def get(self): q = self._in if self.incoroutine() else self._out return q.get() def put(self, *args): q = self._out if self.incoroutine() else self._in return q.put(*args) def fileno(self): q = self._in if self.incoroutine() else self._out return q._reader.fileno() def close(self): self._in.close() self._out.close() def alive(self): return bool(filter(alive, [self._in._reader, self._out._reader]))
class FakeSocket(object): def __init__(self): self.sent = Queue(100) self.received = Queue(100) def get(self): """ Gets a message that was sent by this socket. This method returns what the server would have received.""" return self.sent.get() def put(self, msg): """ Enqueues a message for the client to receive. This method simulates receiving data over a socket. """ self.received.put(msg) def send(self, data): """ Socket interface for sending data to a client. This data is retreivable through .get()""" self.sent.put(data) def recv(self, length = 0): """ Socket interface for receiving data from a server. This data is seedable through .put() """ return self.received.get() def close(self): self.sent.close() self.received.close()
def disc_inserted_queue(): q = Queue() p = Process(target=process, args=(q,)) p.start() yield q q.close() p.terminate()
class _workerQpushTimer(): def __init__(self): self.syncPeriod = 2 self.timer = None self.Qinit() def Qinit(self): self.syncTmpQ = Queue() # flush remain items in queue, and then close and join_thread def Qflush(self): while True: try: self.syncTmpQ.get(True, comm.FLUSH_TIMEOUT) except Empty: break self.syncTmpQ.close() self.syncTmpQ.join_thread() def enableTimer(self, workerPool): self.timer = Timer(self.syncPeriod, self.pushToWorkerQ, [workerPool]) self.timer.start() def disableTimer(self): if self.timer is not None: self.timer.cancel() # function executed periodically, used to sync queue between main process queue and worker queue def pushToWorkerQ(self, workerPool): while not comm.done.value: try: item = self.syncTmpQ.get_nowait() for w in workerPool: w.queue.put_nowait(item) except Empty: break if not comm.done.value: self.enableTimer(workerPool)
class wrapped_dispatcher(object): def __init__(self, enqueued=None, on_load=None): self.queue = Queue() kwargs = { 'queue': self.queue } if enqueued: kwargs['enqueued_tasks'] = enqueued if on_load: kwargs['on_daemon_load'] = on_load self.dispatcher = WrappedDispatcher(**kwargs) self.context = None self.sockets = {} def __enter__(self): self.dispatcher.start() self.context = Context() self.sockets['in'] = self.context.socket(PUSH) self.sockets['out'] = self.context.socket(PULL) self.sockets['in'].connect(settings.ZTASKD_URL) self.sockets['out'].connect(settings.ZTASK_WORKER_URL) return (self.queue, self.sockets['in'], self.sockets['out']) def __exit__(self, exc_type, exc_value, traceback): self.dispatcher.terminate() self.context.destroy() self.queue.close()
class CommunicationChannels(object): '''Bi directional communication channel ''' def __init__(self): self.qin = Queue() self.qout = Queue() def set_child(self): q = self.qin self.qin = self.qout self.qout = q def close(self): self.qin.close() self.qout.close() def dump(self, obj): self.qout.put(obj, block=True) confirm = self.qin.get() assert confirm def load(self, conn=None): res = self.qin.get() self.qout.put(True) return res
def multi_threaded_generator(generator, num_cached=10, num_threads=4): queue = MPQueue(maxsize=num_cached) # define producer (putting items into queue) def producer(): for item in generator: queue.put(item) # pretend we are doing some calculations # sleep(0.5) queue.put("end") # start producer (in a background thread) threads = [] for _ in xrange(num_threads): np.random.seed() threads.append(Process(target=producer)) threads[-1].daemon = True threads[-1].start() # run as consumer (read items from queue, in current thread) # print "starting while" item = queue.get() while item != "end": # print len(item) yield item item = queue.get() queue.close()
def instantiate(self, stream=None): """ Start a local worker process Blocks until the process is up and the center is properly informed """ if self.process and self.process.is_alive(): raise ValueError("Existing process still alive. Please kill first") q = Queue() self.process = Process(target=run_worker, args=(q, self.ip, self.center.ip, self.center.port, self.ncores, self.port, self._given_worker_port, self.local_dir, self.services, self.name)) self.process.daemon = True self.process.start() while True: try: msg = q.get_nowait() if isinstance(msg, Exception): raise msg self.worker_port = msg['port'] assert self.worker_port self.worker_dir = msg['dir'] break except queues.Empty: yield gen.sleep(0.1) logger.info("Nanny %s:%d starts worker process %s:%d", self.ip, self.port, self.ip, self.worker_port) q.close() raise gen.Return('OK')
class CaptureIO(object): def __init__(self, logger, do_capture): self.logger = logger self.do_capture = do_capture self.logging_queue = None self.logging_thread = None self.original_stdio = None def __enter__(self): if self.do_capture: self.original_stdio = (sys.stdout, sys.stderr) self.logging_queue = Queue() self.logging_thread = LogThread(self.logging_queue, self.logger, "info") sys.stdout = LoggingWrapper(self.logging_queue, prefix="STDOUT") sys.stderr = LoggingWrapper(self.logging_queue, prefix="STDERR") self.logging_thread.start() def __exit__(self, *args, **kwargs): if self.do_capture: sys.stdout, sys.stderr = self.original_stdio if self.logging_queue is not None: self.logger.info("Closing logging queue") self.logging_queue.put(None) if self.logging_thread is not None: self.logging_thread.join(10) while not self.logging_queue.empty(): try: self.logger.warning("Dropping log message: %r", self.logging_queue.get()) except Exception: pass self.logging_queue.close() self.logger.info("queue closed")
def private_server(): l = [] q = Queue() p = Process(target=otp_worker, args=(q,)) p.start() l.append(p) p = Process(target=check_aws_cred_worker, args=(q,)) p.start() l.append(p) while True: try: time.sleep(20) except: traceback.print_exc() break print('main proc exiting.') for _ in l: q.put(None) q.close() for p in l: p.join()
class Scheduler(object): def __init__(self): self.queue = Queue() def consume(self, call_back): while True: task = self.queue.get() if task is None: self.queue.close() break time.sleep(0.05) print("Queue got task: {}.".format(task)) call_back(task) def produce(self, value): time.sleep(random.uniform(0.1, 1.0)) task = "TSK {}".format(value) self.queue.put(task) def start(self, call_back, n_tasks=10): consumer = Process(target=self.consume, args=(call_back,)) consumer.start() workers = [Process(target=self.produce,args=(i,)) for i in range(n_tasks)] for w in workers: w.start() for w in workers: w.join() self.queue.put(None) consumer.join()
def notify_queue(jid, password, notify_jids): q = Queue() p = Process(target=process, args=(q, jid, password, notify_jids)) p.start() yield q q.close() p.terminate()
class DataProcess(Process): def __init__(self, data_pipeline, **get_batch_kwargs): super(DataProcess, self).__init__(name='neuralnilm-data-process') self._stop = Event() self._queue = Queue(maxsize=3) self.data_pipeline = data_pipeline self._get_batch_kwargs = get_batch_kwargs def run(self): batch = self.data_pipeline.get_batch(**self._get_batch_kwargs) while not self._stop.is_set(): try: self._queue.put(batch) except AssertionError: # queue is closed break batch = self.data_pipeline.get_batch(**self._get_batch_kwargs) def get_batch(self, timeout=30): if self.is_alive(): return self._queue.get(timeout=timeout) else: raise RuntimeError("Process is not running!") def stop(self): self._stop.set() self._queue.close() self.terminate() self.join()
class WorkerTest(TestCase): """Ensures the worker correctly handles messages """ def setUp(self): self.queue = Queue() self.context = Context() self.socket = self.context.socket(PUSH) self.socket.bind(settings.ZTASK_WORKER_URL) self.worker = WrappedWorker(queue=self.queue) self.worker.start() def tearDown(self): self.worker.terminate() self.context.destroy() def test_exec(self): """Tests executing a task """ uuid = str(uuid4()) self.socket.send_pyobj((uuid,)) self.assertEqual( self.queue.get(), uuid ) self.assertTrue(self.queue.get()) self.queue.close()
class KafkaQueue(object): def __init__(self, client, topic, partitions, producer_config={}, consumer_config={}): """ KafkaQueue a Queue-like object backed by a Kafka producer and some number of consumers Params ====== client: KafkaClient object topic: str, the topic name partitions: list of ints, the partions to consume from producer_config: dict, see below consumer_config: dict, see below Consumer Config =============== consumer_sleep: int, time in milliseconds a consumer should sleep when it reaches the end of a partition. Default is 200 Producer Config =============== producer_timeout: int, time in milliseconds a producer should wait for messages to enqueue for producing. Default is 100 producer_flush_timeout: int, time in milliseconds a producer should allow messages to accumulate before sending to Kafka. Default is 2000 producer_flush_buffer: int, number of messages a producer should allow to accumulate. Default is 500 """ self.in_queue = Queue() self.out_queue = Queue() self.consumers = [] self.barrier = Event() # Initialize and start consumer threads for partition in partitions: consumer = KafkaConsumerProcess(client, topic, partition, self.in_queue, self.barrier, **consumer_config) consumer.start() self.consumers.append(consumer) # Initialize and start producer thread self.producer = KafkaProducerProcess(client, topic, self.out_queue, self.barrier, **producer_config) self.producer.start() # Trigger everything to start self.barrier.set() def get(self, block=True, timeout=None): return self.in_queue.get(block, timeout).payload def put(self, msg, block=True, timeout=None): return self.out_queue.put(msg, block, timeout) def close(self): self.in_queue.close() self.out_queue.close() self.barrier.clear() self.producer.join() for consumer in self.consumers: consumer.join()
def queueManager(numProc, myList, function, *args): '''queueManager(numProc, myList, function, *args): generic function used to start worker processes via the multiprocessing Queue object numProc - number of processors to use myList - a list of objects to be iterated over function - target function *args - additional arguments to pass to function Return - an unordered list of the results from myList ''' qIn = Queue() qOut = JoinableQueue() if args: arguments = (qIn, qOut,) + args else: arguments = (qIn, qOut,) results = [] # reduce processer count if proc count > files i = 0 for l in myList: qIn.put((i,l)) i += 1 for _ in range(numProc): p = Process(target = function, args = arguments).start() sys.stdout.write("Progress: {:>3}%".format(0) ) curProgress = 0 lastProgress = 0 while qOut.qsize() < len(myList): #sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList))))) curProgress = int(ceil(100*qOut.qsize()/len(myList))) if curProgress - lastProgress > 10: lastProgress += 10 sys.stdout.write("\nProgress: {:>3}%".format(lastProgress)) sys.stdout.flush() sys.stdout.write("\nProgress: {:>3}%".format(100)) #sys.stdout.write("\b\b\b\b{:>3}%".format(100)) sys.stdout.write("\n") for _ in range(len(myList)): # indicate done results processing results.append(qOut.get()) qOut.task_done() #tell child processes to stop for _ in range(numProc): qIn.put('STOP') orderedRes = [None]*len(results) for i, res in results: orderedRes[i] = res qOut.join() qIn.close() qOut.close() return orderedRes
def bioportal_benchmark(apikey, output_file, threads): metadata = Namespace("http://data.bioontology.org/metadata/") url = 'http://data.bioontology.org/ontologies?apikey=%s' % apikey ontology_graph = Graph() print(url) ontology_list_json = urlopen(url).read() ontology_graph.parse(StringIO(unicode(ontology_list_json)), format="json-ld") ontologies = ontology_graph.query(bioportal_query) w = open(output_file, 'w') writer = csv.DictWriter(w, stat_cols) writer.writeheader() tasks = Queue() finished_tasks = Queue() dl_lock = Semaphore(4) task_count = len(ontologies) def worker(q, finished_tasks, dl_lock): try: while True: stats = q.get() og = Graph() try: try: dl_lock.acquire() og.load(stats['download_url'] + "?apikey=%s" % apikey) finally: dl_lock.release() print(stats['ontology'], stats['id']) ig = to_isomorphic(og) graph_digest = ig.graph_digest(stats) finished_tasks.put(stats) except Exception as e: print('ERROR', stats['id'], e) stats['error'] = str(e) finished_tasks.put(stats) except Empty: pass for i in range(int(threads)): print("Starting worker", i) t = Process(target=worker, args=[tasks, finished_tasks, dl_lock]) t.daemon = True t.start() for ontology, title, download in ontologies: stats = defaultdict(str) stats.update({ "id": ontology, "ontology": title, "download_url": download }) tasks.put(stats) tasks.close() written_tasks = 0 while written_tasks < task_count: stats = finished_tasks.get() # print "Writing", stats['ontology'] writer.writerow(stats) w.flush() written_tasks += 1
def cluster(nworkers=2, nanny=False, worker_kwargs={}): if nanny: _run_worker = run_nanny else: _run_worker = run_worker scheduler_q = Queue() scheduler = Process(target=run_scheduler, args=(scheduler_q,)) scheduler.daemon = True scheduler.start() sport = scheduler_q.get() workers = [] for i in range(nworkers): q = Queue() fn = '_test_worker-%s' % uuid.uuid1() proc = Process(target=_run_worker, args=(q, sport), kwargs=merge({'ncores': 1, 'local_dir': fn}, worker_kwargs)) workers.append({'proc': proc, 'queue': q, 'dir': fn}) for worker in workers: worker['proc'].start() for worker in workers: worker['port'] = worker['queue'].get() loop = IOLoop() s = rpc(ip='127.0.0.1', port=sport) start = time() try: while True: ncores = loop.run_sync(s.ncores) if len(ncores) == nworkers: break if time() - start > 5: raise Exception("Timeout on cluster creation") yield {'proc': scheduler, 'port': sport}, workers finally: logger.debug("Closing out test cluster") with ignoring(socket.error, TimeoutError, StreamClosedError): loop.run_sync(lambda: disconnect('127.0.0.1', sport), timeout=0.5) scheduler.terminate() scheduler.join(timeout=2) for port in [w['port'] for w in workers]: with ignoring(socket.error, TimeoutError, StreamClosedError): loop.run_sync(lambda: disconnect('127.0.0.1', port), timeout=0.5) for proc in [w['proc'] for w in workers]: with ignoring(Exception): proc.terminate() proc.join(timeout=2) for q in [w['queue'] for w in workers]: q.close() for fn in glob('_test_worker-*'): shutil.rmtree(fn) loop.close(all_fds=True)
class FileWatcher(object): def __init__(self,collector_path,supported_files): self._initialize_members(collector_path,supported_files) def _initialize_members(self,collector_path,supported_files): # initializing observer. event_handler = NewFileEvent(self) self._observer = Observer() self._observer.schedule(event_handler,collector_path) self._collector_path = collector_path self._files_queue = Queue() self._supported_files = supported_files self._logger = logging.getLogger('SPOT.INGEST.WATCHER') self._logger.info("Creating File watcher") self._logger.info("Supported Files: {0}".format(self._supported_files)) def start(self): self._logger.info("Watching: {0}".format(self._collector_path)) self._observer.start() def new_file_detected(self,file): self._logger.info("-------------------------------------- New File detected --------------------------------------") self._logger.info("File: {0}".format(file)) # Validate the file is supported. collected_file_parts = file.split("/") collected_file = collected_file_parts[len(collected_file_parts) -1 ] if (collected_file.endswith(tuple(self._supported_files)) or collected_file.startswith(tuple(self._supported_files)) ) and not ".current" in collected_file: self._files_queue.put(file) self._logger.info("File {0} added to the queue".format(file)) else: self._logger.warning("File extension not supported: {0}".format(file)) self._logger.warning("File won't be ingested") self._logger.info("------------------------------------------------------------------------------------------------") def stop(self): self._logger.info("Stopping File Watcher") self._files_queue.close() while not self._files_queue.empty(): self._files_queue.get() self._observer.stop() self._observer.join() def GetNextFile(self): return self._files_queue.get() @property def HasFiles(self): return not self._files_queue.empty()
def test_transaction_large(self) -> None: queue = Queue() # type: Queue[str] msg = 't' * 100001 # longer than the max read size of 100_000 p = Process(target=server, args=(msg, queue), daemon=True) p.start() connection_name = queue.get() with IPCClient(connection_name, timeout=1) as client: assert client.read() == msg.encode() client.write(b'test') queue.close() queue.join_thread() p.join()
def main(args): # Get the run time from the command line and convert it to a floating point. runTime = float(args[2]) endTime = time.time() + runTime # Initialize queue and other variables q = Queue() timeLeft = runTime numProcs = mp.cpu_count() highestPrimeList = [] #List of higher primes to hold as they are taken from the queue. calcRange = int(args[1]) #This adjusts the range of numbers to be computed for each thread. primesCalculated = 0 whileCounter = 0 highestPrime = 0 # Create a background process to calculate primes until it receives a signal for i in range(0, numProcs): print (i) p = Process(target = findPrimes, args = (calcRange * i, calcRange, q, numProcs, endTime)) p.start() while timeLeft > 0: print print ("There are: " + str(timeLeft) + " seconds left to find a higher prime.") print time.sleep(1) timeLeft -= 1 while not q.empty(): highestPrimeList.append(q.get()) primesCalculated = len(highestPrimeList) # Count back the number of different processes used to ensure the highest prime from # any one of the processes is the highest overall prime number. while numProcs > whileCounter: if highestPrimeList[primesCalculated - whileCounter - 1] > highestPrime: highestPrime = highestPrimeList[primesCalculated - whileCounter - 1] whileCounter += 1 print () print ("Time is up!") print () print ("The Highest prime found in the given time was: " + str(highestPrime)) print () # Clean up all of my child processes. for i in range(0, numProcs): p.join() q.close()
class Multithreaded_Generator(object): def __init__(self, generator, num_processes, num_cached): self.generator = generator self.num_processes = num_processes self.num_cached = num_cached self._queue = None self._threads = [] self.__end_ctr = 0 def __iter__(self): return self def next(self): if self._queue is None: self._start() item = self._queue.get() while item == "end": self.__end_ctr += 1 if self.__end_ctr == self.num_processes: self._finish() raise StopIteration item = self._queue.get() return item def _start(self): self._queue = MPQueue(self.num_cached) def producer(queue, generator): try: for item in generator: queue.put(item) except: self._finish() print "oops...", sys.exc_info()[0] finally: queue.put("end") for _ in xrange(self.num_processes): np.random.seed() self._threads.append(Process(target=producer, args=(self._queue, self.generator))) self._threads[-1].daemon = True self._threads[-1].start() def _finish(self): if len(self._threads) != 0: self._queue.close() for thread in self._threads: if thread.is_alive(): thread.terminate() self._threads = [] self._queue = None self.__end_ctr = 0
class Layer2Worker(): def __init__(self): self.queue = Queue() # close and join_thread queue def Qflush(self): while True: time.sleep(comm.FLUSH_TIMEOUT) if self.queue.qsize() == 0: break self.queue.close() self.queue.join_thread() def saveProcess(self, process): self.process = process
def main(): # build_proxy() queue = Queue(2048) for i in range(128): Process(target=retrieve_from_queue, args=(queue,)).start() with open('samples.log') as f: process(f, queue) queue.close() queue.join_thread() # pool.close() pool.join()
def test_nrpe_poller(self): mod = nrpe_poller.Nrpe_poller(modconf) sl = get_instance(mod) # Look if we really change our commands print sl.__dict__ sl.id = 1 sl.i_am_dying = False to_queue = Queue() manager = Manager() from_queue = manager.list() control_queue = Queue() # We prepare a check in the to_queue status = 'queue' command = "$USER1$/check_nrpe -H localhost33 -n -u -t 1 -c check_load3 -a 20"# -a arg1 arg2 arg3" ref = None t_to_to = time.time() c = Check(status, command, ref, t_to_to) msg = Message(id=0, type='Do', data=c) to_queue.put(msg) # The worker will read a message by loop. We want it to # do 2 loops, so we fake a message, adn the Number 2 is a real # exit one msg1 = Message(id=0, type='All is good, continue') msg2 = Message(id=0, type='Die') control_queue.put(msg1) for _ in xrange(1, 2): control_queue.put(msg1) #control_queue.put(msg1) #control_queue.put(msg1) #control_queue.put(msg1) #control_queue.put(msg1) control_queue.put(msg2) sl.work(to_queue, from_queue, control_queue) o = from_queue.pop() print "O", o print o.__dict__ self.assert_(o.status == 'done') self.assert_(o.exit_status == 2) to_queue.close() control_queue.close()
def run(self): """ Called by the thread, it runs the process. NEVER call this method directly. Instead call start() to start the thread. Before finishing the thread using this thread, call join() """ queue = Queue() process = UploadProcess(self._connection_settings, self._room, queue, self._files) if self._data: process.add_data(self._data) process.start() if not process.is_alive(): return self._uploading = True done = False while not self._abort and not done: if not process.is_alive(): self._abort = True break messages = None try: data = queue.get() if not data: done = True if self._finished_callback: self._finished_callback() elif isinstance(data, tuple): sent, total = data if self._progress_callback: self._progress_callback(sent, total) else: self._abort = True if self._error_callback: self._error_callback(data, self._room) except Empty: time.sleep(0.5) self._uploading = False if self._abort and not process.is_alive() and self._error_callback: self._error_callback(Exception("Upload process was killed"), self._room) queue.close() if process.is_alive(): queue.close() process.terminate() process.join()
def test_queue(): q=Queue() #procLst=[Process(target=p, args=(q,) ) for p in [prod1, prod2, consum1, consum2]] pLst=[Process(target=p, args=(q,) ) for p in [prod1, prod2]] cLst=[Process(target=p, args=(q,) ) for p in [consum1, consum2]] procLst=pLst+cLst for pp in procLst: pp.start() # for pp in pLst: # pp.join() # q.put('STOP') q.close() # print 'Queue is closed' q.join_thread()
class Director(object): def __init__(self, producer, consumer): self.producer = producer self.consumer = consumer self.queue = Queue() self.prod_proc = Process(target = self.produce) self.prod_proc.daemon = True self.lock = Lock() self.done = Value('b') self.done.value = 0 def start(self): self.prod_proc.start() def step(self): self.lock.acquire() done = (self.done.value != 0) self.lock.release() if done: raise Done try: data = self.queue.get(block = True, timeout = 1.0) self.consumer.consume(data) except Empty: pass def stop(self): self.prod_proc.join() def run(self): self.start() while True: try: self.step() except Done: break self.stop() def produce(self): try: while True: data = self.producer.produce() self.queue.put(data) except: self.lock.acquire() self.done.value = 1 self.lock.release() self.queue.close() self.queue.join_thread()
class AqdbManager(SyncManager): """ Manages parallel processing upgrade queue """ def __init__(self, timeout=120): #FIXME: fill in your connect string here #self._cstr = 'oracle://*****:*****@LNTO_AQUILON_NY' self._cstr = '' assert self._cstr, 'Add a database connection string in line 147' self.timeout = timeout self.NUMBER_OF_PROCESSES = cpu_count() if self.NUMBER_OF_PROCESSES < 4: self.NUMBER_OF_PROCESSES = 4 self.host_q = Queue() self.si_cache = get_si_cache(self._cstr) self.os_cache = get_one_os_cache(self._cstr) def start(self): print "starting %d workers" % self.NUMBER_OF_PROCESSES self.workers = [Process( target=work, args=(self._cstr, self.host_q, self.os_cache, self.si_cache)) for i in xrange(self.NUMBER_OF_PROCESSES)] for w in self.workers: w.start() enqueue_hosts(self._cstr, self.host_q) #, 500) for w in self.workers: w.join(self.timeout) for w in self.workers: w.terminate() #run post processing if post_processing(self._cstr, self.os_cache): print """ All hosts have an operating system, and all build items processed successfully. Complete the schema migration by executing the post_os_upgrade.sql script. """ def stop(self): self.host_q.put(None) for w in self.workers: w.join(self.timeout) #w.terminate() self.host_q.close()
class ManagerProcess(BaseProcess): """ A manager process that uses worker processes to process a collection of documents. """ def __init__(self, nr_procs: int = 1): """ Initialises the worker processes and their queues. :param nr_procs: The number of processes that are to be used in total, including the manager. """ super().__init__() self.document_queue = Queue() self.result_queue = Queue() self.processes = [ WorkerProcess(self.document_queue, self.result_queue, i + 1) for i in range(nr_procs - 1) ] def run(self, filename: str) -> None: """ Processes a CSV file of documents using a number of worker processes. :param filename: The name of the CSV file; relative to the directory from where the code is being run. """ try: # Start the worker processes for proc in self.processes: proc.process.start() nr_procs = len(self.processes) + 1 # Simply call the runner of the `BaseProcess` subclass if there are no workers if nr_procs == 1: BaseProcess.run(self, filename) return # Distribute the documents by simply putting them in a queue debug("Process 0 has started.") for index, data in enumerate(read_csv(filename)): if self.document_queue.qsize() < 3 * nr_procs: self.document_queue.put(self.get_data(data)) else: self.process_data(*self.get_data(data), True) debug("Process 0 has finished.") # Signal to the workers that all of the documents have been distributed for _ in range(nr_procs - 1): self.document_queue.put(None) # Collect all of the TF scores returned by the workers debug("Result aggregation has started.") for _ in range(nr_procs - 1): tfidf, data_ids = self.result_queue.get() self.tfidf += tfidf offset = len(self.data_ids) for data_id, document_id in data_ids.items(): self.data_ids[data_id] = offset + document_id self.tfidf.optimise() debug("Result aggregation has finished.") # Wait for all of the workers to finish for proc in self.processes: proc.process.join() except KeyboardInterrupt: self.document_queue.close() self.result_queue.close() debug("Process 0 has been interrupted.")
def BuildStamps(nobjects, config, nproc=1, logger=None, obj_num=0, xsize=0, ysize=0, do_noise=True, make_psf_image=False, make_weight_image=False, make_badpix_image=False): """ Build a number of postage stamp images as specified by the config dict. @param nobjects How many postage stamps to build. @param config A configuration dict. @param nproc How many processes to use. [default: 1] @param logger If given, a logger object to log progress. [default: None] @param obj_num If given, the current obj_num. [default: 0] @param xsize The size of a single stamp in the x direction. [default: 0, which means to look for config.image.stamp_xsize, and if that's not there, use automatic sizing.] @param ysize The size of a single stamp in the y direction. [default: 0, which means to look for config.image.stamp_xsize, and if that's not there, use automatic sizing.] @param do_noise Whether to add noise to the image (according to config['noise']). [default: True] @param make_psf_image Whether to make psf_image. [default: False] @param make_weight_image Whether to make weight_image. [default: False] @param make_badpix_image Whether to make badpix_image. [default: False] @returns the tuple (images, psf_images, weight_images, badpix_images, current_vars). All in tuple are lists. """ config['obj_num'] = obj_num def worker(input, output): proc = current_process().name for job in iter(input.get, 'STOP'): try : (kwargs, obj_num, nobj, info, logger) = job if logger: logger.debug('%s: Received job to do %d stamps, starting with %d', proc,nobj,obj_num) results = [] for k in range(nobj): kwargs['obj_num'] = obj_num + k kwargs['logger'] = logger result = BuildSingleStamp(**kwargs) results.append(result) # Note: numpy shape is y,x ys, xs = result[0].array.shape t = result[5] if logger: logger.info('%s: Stamp %d: size = %d x %d, time = %f sec', proc, obj_num+k, xs, ys, t) output.put( (results, info, proc) ) if logger: logger.debug('%s: Finished job %d -- %d',proc,obj_num,obj_num+nobj-1) except Exception as e: import traceback tr = traceback.format_exc() if logger: logger.error('%s: Caught exception %s\n%s',proc,str(e),tr) output.put( (e, info, tr) ) if logger: logger.debug('%s: Received STOP',proc) # The kwargs to pass to build_func. # We'll be adding to this below... kwargs = { 'xsize' : xsize, 'ysize' : ysize, 'do_noise' : do_noise, 'make_psf_image' : make_psf_image, 'make_weight_image' : make_weight_image, 'make_badpix_image' : make_badpix_image } if nproc > nobjects: if logger: logger.warn( "Trying to use more processes than objects: image.nproc=%d, "%nproc + "nobjects=%d. Reducing nproc to %d."%(nobjects,nobjects)) nproc = nobjects if nproc <= 0: # Try to figure out a good number of processes to use try: from multiprocessing import cpu_count ncpu = cpu_count() if ncpu > nobjects: nproc = nobjects else: nproc = ncpu if logger: logger.info("ncpu = %d. Using %d processes",ncpu,nproc) except: if logger: logger.warn("config.image.nproc <= 0, but unable to determine number of cpus.") nproc = 1 if logger: logger.info("Unable to determine ncpu. Using %d processes",nproc) if nproc > 1: from multiprocessing import Process, Queue, current_process from multiprocessing.managers import BaseManager # Initialize the images list to have the correct size. # This is important here, since we'll be getting back images in a random order, # and we need them to go in the right places (in order to have deterministic # output files). So we initialize the list to be the right size. images = [ None for i in range(nobjects) ] psf_images = [ None for i in range(nobjects) ] weight_images = [ None for i in range(nobjects) ] badpix_images = [ None for i in range(nobjects) ] current_vars = [ None for i in range(nobjects) ] # Number of objects to do in each task: # At most nobjects / nproc. # At least 1 normally, but number in Ring if doing a Ring test # Shoot for geometric mean of these two. max_nobj = nobjects / nproc min_nobj = 1 if ( 'gal' in config and isinstance(config['gal'],dict) and 'type' in config['gal'] and config['gal']['type'] == 'Ring' and 'num' in config['gal'] ): min_nobj = galsim.config.ParseValue(config['gal'], 'num', config, int)[0] if max_nobj < min_nobj: nobj_per_task = min_nobj else: import math # This formula keeps nobj a multiple of min_nobj, so Rings are intact. nobj_per_task = min_nobj * int(math.sqrt(float(max_nobj) / float(min_nobj))) # The logger is not picklable, se we set up a proxy object. See comments in process.py # for more details about how this works. class LoggerManager(BaseManager): pass if logger: logger_generator = galsim.utilities.SimpleGenerator(logger) LoggerManager.register('logger', callable = logger_generator) logger_manager = LoggerManager() logger_manager.start() # Set up the task list task_queue = Queue() for k in range(0,nobjects,nobj_per_task): import copy kwargs1 = copy.copy(kwargs) kwargs1['config'] = galsim.config.CopyConfig(config) if logger: logger_proxy = logger_manager.logger() else: logger_proxy = None nobj1 = min(nobj_per_task, nobjects-k) task_queue.put( ( kwargs1, obj_num+k, nobj1, k, logger_proxy ) ) # Run the tasks # Each Process command starts up a parallel process that will keep checking the queue # for a new task. If there is one there, it grabs it and does it. If not, it waits # until there is one to grab. When it finds a 'STOP', it shuts down. done_queue = Queue() p_list = [] for j in range(nproc): # The name is actually the default name for the first time we do this, # but after that it just keeps incrementing the numbers, rather than starting # over at Process-1. As far as I can tell, it's not actually spawning more # processes, so for the sake of the info output, we name the processes # explicitly. p = Process(target=worker, args=(task_queue, done_queue), name='Process-%d'%(j+1)) p.start() p_list.append(p) # In the meanwhile, the main process keeps going. We pull each set of images off of the # done_queue and put them in the appropriate place in the lists. # This loop is happening while the other processes are still working on their tasks. # You'll see that these logging statements get print out as the stamp images are still # being drawn. for i in range(0,nobjects,nobj_per_task): results, k0, proc = done_queue.get() if isinstance(results,Exception): # results is really the exception, e # proc is really the traceback if logger: logger.error('Exception caught during job starting with stamp %d', k0) logger.error('Aborting the rest of this image') for j in range(nproc): p_list[j].terminate() raise results k = k0 for result in results: images[k] = result[0] psf_images[k] = result[1] weight_images[k] = result[2] badpix_images[k] = result[3] current_vars[k] = result[4] k += 1 if logger: logger.debug('%s: Successfully returned results for stamps %d--%d', proc, k0, k-1) # Stop the processes # The 'STOP's could have been put on the task list before starting the processes, or you # can wait. In some cases it can be useful to clear out the done_queue (as we just did) # and then add on some more tasks. We don't need that here, but it's perfectly fine to do. # Once you are done with the processes, putting nproc 'STOP's will stop them all. # This is important, because the program will keep running as long as there are running # processes, even if the main process gets to the end. So you do want to make sure to # add those 'STOP's at some point! for j in range(nproc): task_queue.put('STOP') for j in range(nproc): p_list[j].join() task_queue.close() else : # nproc == 1 images = [] psf_images = [] weight_images = [] badpix_images = [] current_vars = [] for k in range(nobjects): kwargs['config'] = config kwargs['obj_num'] = obj_num+k kwargs['logger'] = logger result = BuildSingleStamp(**kwargs) images += [ result[0] ] psf_images += [ result[1] ] weight_images += [ result[2] ] badpix_images += [ result[3] ] current_vars += [ result[4] ] if logger: # Note: numpy shape is y,x ys, xs = result[0].array.shape t = result[5] logger.info('Stamp %d: size = %d x %d, time = %f sec', obj_num+k, xs, ys, t) if logger: logger.debug('image %d: Done making stamps',config.get('image_num',0)) return images, psf_images, weight_images, badpix_images, current_vars
class Producer(object): """ Abstract base class for all production activities Manages a process and its inbound/outbound queues. Child classes should implement: * handle_message: receive messages from the host application * production_step: do the next production step for this process """ __metaclass__ = ABCMeta def __init__(self, buffer_size=None): """ Args: * buffer_size: how many outbound productions to cache. If buffer_size is None, will continue producing for all time If buffer_size is an integer, it will fill the outbound queue with exactly that many items. It will only produce again when the queue drops under the buffer size """ self.process = None self.inbound = Queue() if buffer_size is None: self.outbound = Queue() else: self.outbound = Queue(maxsize=buffer_size) self._did_start = False self._exit = Event() def _shutdown(self): self.inbound.close() self.outbound.close() self._exit.set() @abstractmethod def handle_message(self, msg): """Handle an inbound message from the host application""" pass @abstractmethod def production_step(self): """Produce the next step in the output sequence""" pass def run(self, inbound, outbound): """ The "run step" for this process. Handles inbound messages, and generating production steps Args: * inbound: the inbound message queue, which can send commands to the process. If a STOP_MSG item is sent, the process terminates * outbound: the outbound production queue- the output NB: I tried having these as `self` accesses, and not parameters, but it seems like the queues wouldn't get populated. """ while not self._exit.is_set(): while not inbound.empty(): msg = inbound.get_nowait() try: self.handle_message(msg) except Exception as e: outbound.put(MessageHandlingError(e)) if not outbound.full(): try: outbound.put(self.production_step()) except Exception as e: outbound.put(ProductionStepError(e)) def start(self): """ Start the child production process """ if self._did_start: raise AlreadyStartedError() self.process = Process(target=self.run, args=(self.inbound, self.outbound)) self._did_start = True self.process.start() def stop(self): """ Send a stop message to end the child process. The child process will take this to shutdown gracefully. """ if self._did_start: self._shutdown() self.process.join(0.01) self.process.terminate() def send(self, msg): """ Send a message to the child process Args: msg: whatever arbitrary data the child process wishes to handle """ self.inbound.put_nowait(msg) def get(self, timeout=0.01): """ Return the next message in the outbound queue. If that message contains an exception, raises the exception instead. If the process hasn't been started, starts the process instead. """ if not self._did_start: raise NotStartedException() res = self.outbound.get(timeout=timeout) if isinstance(res, ProductionError): raise res return res
def __run_test(self, transport_bundle, exporter_factory, importer_factory, test_kwargs=True): """ Runs a remote service call test :param transport_bundle: Transport implementation bundle to use :param exporter_factory: Name of the RS exporter factory :param importer_factory: Name of the RS importer factory :param test_kwargs: Test keyword arguments :raise queue.Empty: Peer took to long to answer :raise ValueError: Test failed """ # Define components components = [(exporter_factory, "rs-exporter"), (importer_factory, "rs-importer")] # Start the remote framework print("Starting...") status_queue = Queue() peer = WrappedProcess(target=export_framework, args=(status_queue, APP_ID, transport_bundle, components)) peer.start() try: # Wait for the ready state state = status_queue.get(5) self.assertEqual(state, "ready") # Load the local framework (after the fork) framework = load_framework(APP_ID, transport_bundle, components) context = framework.get_bundle_context() # Look for the remote service for _ in range(30): svc_ref = context.get_service_reference(SVC_SPEC) if svc_ref is not None: break time.sleep(.5) else: self.fail("Remote Service not found") # Get it svc = context.get_service(svc_ref) # Dummy call result = svc.dummy() state = status_queue.get(10) self.assertEqual(state, "call-dummy") self.assertIsNone(result, "Dummy didn't returned None: {0}".format(result)) # Echo call for value in (None, "Test", 42, [1, 2, 3], {"a": "b"}): result = svc.echo(value) # Check state state = status_queue.get(10) self.assertEqual(state, "call-echo") # Check result self.assertEqual(result, value) if test_kwargs: # Keyword arguments sample_text = "SomeSampleText" # Test as-is with default arguments result = svc.keywords(text=sample_text) state = status_queue.get(10) self.assertEqual(state, "call-keyword") self.assertEqual(result, sample_text.upper()) # Test with keywords in the same order as positional arguments result = svc.keywords(text=sample_text, to_lower=True) state = status_queue.get(10) self.assertEqual(state, "call-keyword") self.assertEqual(result, sample_text.lower()) result = svc.keywords(text=sample_text, to_lower=False) state = status_queue.get(10) self.assertEqual(state, "call-keyword") self.assertEqual(result, sample_text.upper()) # Test with keywords in a different order # than positional arguments result = svc.keywords(to_lower=True, text=sample_text) state = status_queue.get(10) self.assertEqual(state, "call-keyword") self.assertEqual(result, sample_text.lower()) # Exception handling try: svc.error() except pelix.remote.RemoteServiceError: # The error has been propagated state = status_queue.get(10) self.assertEqual(state, "call-error") else: self.fail("No exception raised calling 'error'") # Call undefined method self.assertRaises(Exception, svc.undefined) try: # Stop the peer svc.stop() except pelix.remote.RemoteServiceError: # Exception can occur because the peer is disconnected from # MQTT before the call result is received pass # Wait for the peer to stop state = status_queue.get(10) self.assertEqual(state, "stopping") # Wait a bit more, to let coverage save its files time.sleep(.1) finally: # Stop everything (and delete the framework in any case FrameworkFactory.delete_framework() peer.terminate() status_queue.close()
class SubprocVecEnv(object): def __init__(self, env_fns, menv): """ envs: list of gym environments to run in subprocesses """ self.menv = menv # env number in send buffer self.num_envs = len(env_fns) # all env in sample buffer self.closed = False nenvs = len(env_fns) self.nenvs = nenvs env_queues = [Queue() for _ in range(nenvs)] self.shared_queue = Queue() self.ps = [ Process(target=_worker, args=(env_queues[i], self.shared_queue, CloudpickleWrapper(env_fns[i]))) for i in range(nenvs) ] for p in self.ps: # if the main process crashes, we should not cause things to hang p.daemon = True p.start() self.env_queues = dict() for p, queue in zip(self.ps, env_queues): self.env_queues[p.pid] = queue self.current_pids = None def _step_async(self, actions): """ Tell all the environments to start taking a step with the given actions. Call step_wait() to get the results of the step. You should not call this if a step_async run is already pending. """ for pid, action in zip(self.current_pids, actions): self.env_queues[pid].put(('step', action)) def _step_wait(self): """ Wait for the step taken with step_async(). Returns (obs, rews, dones, infos): - obs: an array of observations, or a tuple of arrays of observations. - rews: an array of rewards - dones: an array of "episode done" booleans - infos: a sequence of info objects """ results = [] self.current_pids = [] while len(results) < self.menv: data, pid = self.shared_queue.get() results.append(data) self.current_pids.append(pid) obs, rews, dones, infos = zip(*results) return np.stack(obs), np.stack(rews), np.stack(dones), infos def reset(self): """ Reset all the environments and return an array of observations, or a tuple of observation arrays. If step_async is still doing work, that work will be cancelled and step_wait() should not be called until step_async() is invoked again. """ for queue in self.env_queues.values(): # initialize all queue.put(('reset', None)) results = [] self.current_pids = [] while len(results) < self.menv: data, pid = self.shared_queue.get() results.append(data[0]) self.current_pids.append(pid) return np.stack(results) def close(self): if self.closed: return for queue in self.env_queues.values(): queue.put(('close', None)) self.shared_queue.close() for p in self.ps: p.join() self.closed = True def __len__(self): return self.nenvs def step(self, actions): self._step_async(actions) return self._step_wait()
class CallbackTask(object): QUEUE_SIZE = 4000 def __init__(self, config): """ Arguments: config: flask app config """ self._input_q = Queue(CallbackTask.QUEUE_SIZE) settings = {'DB_URI': config['SQLALCHEMY_DATABASE_URI']} settings.update(config['CALLBACK_CONF']) self._task = Process(target=CallbackTask._task_func, args=(self._input_q, settings)) self._task.start() @staticmethod def _task_func(input_q, settings): """ Argumenst: input_q (multiprocessing.Queue): command input () settings (dict): """ logger.debug("Task running") # We need to create a new DB session for the process, the existing # one can only be used by flask main process and its threads db_session = configure_db(settings['DB_URI']) # CallbackManager handles all the logic callback_manager = CallbackManager( db_session=db_session, retries=settings['RETRIES'], retry_period=settings['RETRY_PERIOD'], nthreads=settings['NTHREADS']) # Main dispatch loop while True: try: cmd, data = input_q.get(timeout=1) if cmd == NEW_CALLBACK: # data: <commands.CallbackData> data = pickle.loads(data) logger.debug("New Callback (id: {})".format(data.id)) callback_manager.new_callback(data) elif cmd == ACK_CALLBACK: # data: <commands.CallbackData.id> -> string logger.debug("Ack Callback (id: {})".format(data)) callback_manager.ack_callback(data) elif cmd == EXIT_TASK: callback_manager.close() break else: logger.debug("Unknown command {}".format(cmd)) except queue.Empty: continue input_q.close() exit(0) def new_callback(self, callback_data): self._input_q.put((NEW_CALLBACK, pickle.dumps(callback_data))) def ack_callback(self, callback_id): self._input_q.put((ACK_CALLBACK, callback_id)) def close(self): self._input_q.put((EXIT_TASK, None)) self._input_q.close() self._task.join()
class Profiling: _current_queue = None def pool_args(self): return { "initializer": Profiling.init_child, "initargs": (self.collecting_q, ) } def __init__(self): self.result_q = Queue() self.collecting_q = Queue() def __enter__(self): self.reader = Process( target=Profiling.read_profiling_result, args=( self.collecting_q, self.result_q, ), ) self.reader.start() def __exit__(self, exc_type, exc_value, traceback): self.collecting_q.put(None) self.collecting_q.close() self.summary = self.result_q.get() self.result_q.close() self.reader.join() def measure(section): def decorator(fn): @wraps(fn) def with_profiling(*args, **kwargs): q = Profiling._current_queue if q is None: return fn(*args, **kwargs) start_time = time.time() error = None try: ret = fn(*args, **kwargs) except Exception as e: error = e elapsed_time = time.time() - start_time q.put_nowait((section, elapsed_time)) if error is None: return ret else: raise error return with_profiling return decorator def print_summary(self, keys=None): if not hasattr(self, 'summary'): return summary = self.summary keys = keys if keys is not None else sorted(summary.keys()) datatable = [[ "Section", "Count", "Min", "Avg", "50%", "80%", "95%", "Max" ]] for section in keys: stats = summary[section] datatable.append([ section, stats["count"], stats["min"], stats["avg"], stats["median"], stats["p80"], stats["p95"], stats["max"], ]) table = AsciiTable(datatable) print(table.table) def read_profiling_result(collecting_q, result_q): sink = ProfilerSink() for section, duration in iter(collecting_q.get, None): sink.append(section, duration) result_q.put(sink.summary()) def init_child(queue): Profiling._current_queue = queue
class Dataset(): def __init__(self, path=None): self.num_classes = None self.classes = None self.images = None self.labels = None self.features = None self.index_queue = None self.queue_idx = None self.batch_queue = None self.is_typeB = None if path is not None: self.init_from_path(path) def init_from_path(self, path): path = os.path.expanduser(path) _, ext = os.path.splitext(path) if os.path.isdir(path): self.init_from_folder(path) elif ext == '.txt': self.init_from_list(path) else: raise ValueError('Cannot initialize dataset from path: %s\n\ It should be either a folder or a .txt list file' % path) print('%d images of %d classes loaded' % (len(self.images), self.num_classes)) def init_from_folder(self, folder): folder = os.path.expanduser(folder) class_names = os.listdir(folder) class_names.sort() classes = [] images = [] labels = [] for label, class_name in enumerate(class_names): classdir = os.path.join(folder, class_name) if os.path.isdir(classdir): images_class = os.listdir(classdir) images_class.sort() images_class = [ os.path.join(classdir, img) for img in images_class ] indices_class = np.arange(len(images), len(images) + len(images_class)) classes.append(DataClass(class_name, indices_class, label)) images.extend(images_class) labels.extend(len(images_class) * [label]) self.classes = np.array(classes, dtype=np.object) self.images = np.array(images, dtype=np.object) self.labels = np.array(labels, dtype=np.int32) self.num_classes = len(classes) def init_from_list(self, filename): with open(filename, 'r') as f: lines = f.readlines() lines = [line.strip().split(' ') for line in lines] assert len(lines)>0, \ 'List file must be in format: "fullpath(str) label(int)"' images = [line[0] for line in lines] if len(lines[0]) > 1: labels = [int(line[1]) for line in lines] else: labels = [os.path.dirname(img) for img in images] _, labels = np.unique(labels, return_inverse=True) self.images = np.array(images, dtype=np.object) self.labels = np.array(labels, dtype=np.int32) self.init_classes() def init_classes(self): dict_classes = {} classes = [] for i, label in enumerate(self.labels): if not label in dict_classes: dict_classes[label] = [i] else: dict_classes[label].append(i) for label, indices in dict_classes.items(): classes.append(DataClass(str(label), indices, label)) self.classes = np.array(classes, dtype=np.object) self.num_classes = len(classes) def separate_AB(self): assert type(self.images[0]) is str self.is_typeB = np.zeros(len(self.images), dtype=np.bool) for c in self.classes: # Find the index of type A file c.indices_A = [ i for i in c.indices if not is_typeB(self.images[i]) ] assert len(c.indices_A) >= 1, str(self.images[c.indices]) # Find the index of type B file c.indices_B = [i for i in c.indices if is_typeB(self.images[i])] assert len(c.indices_B) >= 1, str(self.images[c.indices]) self.is_typeB[c.indices_B] = True print('type A images: %d type B images: %d' % (np.sum(~self.is_typeB), np.sum(self.is_typeB))) # Data Loading def init_index_queue(self): if self.index_queue is None: self.index_queue = Queue() index_queue = np.random.permutation(len(self.images))[:, None] for idx in list(index_queue): self.index_queue.put(idx) def get_batch(self, batch_size, batch_format): ''' Get the indices from index queue and fetch the data with indices.''' indices_batch = [] if batch_format == 'random_sample': while len(indices_batch) < batch_size: indices_batch.extend( self.index_queue.get(block=True, timeout=30)) assert len(indices_batch) == batch_size elif batch_format == 'random_pair': assert batch_size % 2 == 0 classes = np.random.permutation(self.classes)[:batch_size // 2] indices_batch = np.concatenate([c.random_pair() for c in classes], axis=0) elif batch_format == 'random_AB_pair': assert batch_size % 2 == 0 classes = np.random.permutation(self.classes)[:batch_size // 2] indices_batch = np.concatenate( [c.random_AB_pair() for c in classes], axis=0) else: raise ValueError( 'get_batch: Unknown batch_format: {}!'.format(batch_format)) batch = {} if len(indices_batch) > 0: batch['images'] = self.images[indices_batch] batch['labels'] = self.labels[indices_batch] if self.is_typeB is not None: batch['is_typeB'] = self.is_typeB[indices_batch] return batch # Multithreading preprocessing images def start_index_queue(self): self.index_queue = Queue() def index_queue_worker(): while True: if self.index_queue.empty(): self.init_index_queue() time.sleep(0.01) self.index_worker = Process(target=index_queue_worker) self.index_worker.daemon = True self.index_worker.start() def start_batch_queue(self, config, is_training, maxsize=1, num_threads=4): if self.index_queue is None: self.start_index_queue() self.batch_queue = Queue(maxsize=maxsize) def batch_queue_worker(seed): np.random.seed(seed) while True: batch = self.get_batch(config.batch_size, config.batch_format) if batch is not None: batch['image_paths'] = batch['images'] batch['images'] = preprocess(batch['image_paths'], config, is_training) self.batch_queue.put(batch) self.batch_workers = [] for i in range(num_threads): worker = Process(target=batch_queue_worker, args=(i, )) worker.daemon = True worker.start() self.batch_workers.append(worker) def pop_batch_queue(self): batch = self.batch_queue.get(block=True, timeout=60) return batch def release_queue(self): if self.index_queue is not None: self.index_queue.close() if self.batch_queue is not None: self.batch_queue.close() if self.index_worker is not None: self.index_worker.terminate() del self.index_worker self.index_worker = None if self.batch_workers is not None: for w in self.batch_workers: w.terminate() del w self.batch_workers = None
class LocalOptInterfacer(object): """ This class defines the APOSMM interface to various local optimization routines. Currently supported routines are - NLopt routines ['LN_SBPLX', 'LN_BOBYQA', 'LN_COBYLA', 'LN_NEWUOA', 'LN_NELDERMEAD', 'LD_MMA'] - PETSc/TAO routines ['pounders', 'blmvm', 'nm'] - SciPy routines ['scipy_Nelder-Mead', 'scipy_COBYLA', 'scipy_BFGS'] - DFOLS ['dfols'] - External local optimizer ['external_localopt'] (which use files to pass/receive x/f values) """ def __init__(self, user_specs, x0, f0, grad0=None): """ :param x0: A numpy array of the initial guess solution. This guess should be scaled to a unit cube. :param f0: A numpy array of the initial function value. .. warning:: In order to have correct functioning of the local optimization child processes. ~self.iterate~ should be called immediately after creating the class. """ self.parent_can_read = Event() self.comm_queue = Queue() self.child_can_read = Event() self.x0 = x0.copy() self.f0 = f0.copy() if grad0 is not None: self.grad0 = grad0.copy() else: self.grad0 = None # Setting the local optimization method if user_specs['localopt_method'] in [ 'LN_SBPLX', 'LN_BOBYQA', 'LN_COBYLA', 'LN_NEWUOA', 'LN_NELDERMEAD', 'LD_MMA' ]: run_local_opt = run_local_nlopt elif user_specs['localopt_method'] in ['pounders', 'blmvm', 'nm']: run_local_opt = run_local_tao elif user_specs['localopt_method'] in [ 'scipy_Nelder-Mead', 'scipy_COBYLA', 'scipy_BFGS' ]: run_local_opt = run_local_scipy_opt elif user_specs['localopt_method'] in ['dfols']: run_local_opt = run_local_dfols elif user_specs['localopt_method'] in ['external_localopt']: run_local_opt = run_external_localopt self.parent_can_read.clear() self.process = Process( target=opt_runner, args=(run_local_opt, user_specs, self.comm_queue, x0, f0, self.child_can_read, self.parent_can_read)) self.process.start() self.is_running = True self.parent_can_read.wait() x_new = self.comm_queue.get() if isinstance(x_new, ErrorMsg): raise APOSMMException(x_new.x) assert np.allclose(x_new, x0, rtol=1e-15, atol=1e-15), \ "The first point requested by this run does not match the starting point. Exiting" def iterate(self, data): """ Returns an instance of either :class:`numpy.ndarray` corresponding to the next iterative guess or :class:`ConvergedMsg` when the solver has completed its run. :param x_on_cube: A numpy array of the point being evaluated (for a handshake) :param f: A numpy array of the function evaluation. :param grad: A numpy array of the function's gradient. :param fvec: A numpy array of the function's component values. """ self.parent_can_read.clear() if 'grad' in data.dtype.names: self.comm_queue.put((data['x_on_cube'], data['f'], data['grad'])) elif 'fvec' in data.dtype.names: self.comm_queue.put((data['x_on_cube'], data['fvec'])) else: self.comm_queue.put(( data['x_on_cube'], data['f'], )) self.child_can_read.set() self.parent_can_read.wait() x_new = self.comm_queue.get() if isinstance(x_new, ErrorMsg): raise APOSMMException(x_new.x) elif isinstance(x_new, ConvergedMsg): self.process.join() self.comm_queue.close() self.comm_queue.join_thread() self.is_running = False else: x_new = np.atleast_2d(x_new) return x_new def destroy(self, previous_x): while not isinstance(previous_x, ConvergedMsg): self.parent_can_read.clear() if self.grad0 is None: self.comm_queue.put(( previous_x, 0 * np.ones_like(self.f0), )) else: self.comm_queue.put((previous_x, 0 * np.ones_like(self.f0), np.zeros_like(self.grad0))) self.child_can_read.set() self.parent_can_read.wait() previous_x = self.comm_queue.get() assert isinstance(previous_x, ConvergedMsg) self.process.join() self.comm_queue.close() self.comm_queue.join_thread() self.is_running = False
class Sentinel(object): def __init__(self, stop_event, start_event, list_key=Conf.Q_LIST, timeout=Conf.TIMEOUT, start=True): # Make sure we catch signals for the pool signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_DFL) self.pid = current_process().pid self.parent_pid = os.getppid() self.name = current_process().name self.list_key = list_key self.r = redis_client self.reincarnations = 0 self.tob = timezone.now() self.stop_event = stop_event self.start_event = start_event self.pool_size = Conf.WORKERS self.pool = [] self.timeout = timeout self.task_queue = Queue() self.result_queue = Queue() self.event_out = Event() self.monitor = Process() self.pusher = Process() if start: self.start() def start(self): self.spawn_cluster() self.guard() def status(self): if not self.start_event.is_set() and not self.stop_event.is_set(): return Conf.STARTING elif self.start_event.is_set() and not self.stop_event.is_set(): if self.result_queue.qsize() == 0 and self.task_queue.qsize() == 0: return Conf.IDLE return Conf.WORKING elif self.stop_event.is_set() and self.start_event.is_set(): if self.monitor.is_alive() or self.pusher.is_alive() or len( self.pool) > 0: return Conf.STOPPING return Conf.STOPPED def spawn_process(self, target, *args): """ :type target: function or class """ # This is just for PyCharm to not crash. Ignore it. if not hasattr(sys.stdin, 'close'): def dummy_close(): pass sys.stdin.close = dummy_close p = Process(target=target, args=args) p.daemon = True if target == worker: p.timer = args[2] self.pool.append(p) p.start() return p def spawn_pusher(self): return self.spawn_process(pusher, self.task_queue, self.event_out, self.list_key, self.r) def spawn_worker(self): self.spawn_process(worker, self.task_queue, self.result_queue, Value('b', -1), self.timeout) def spawn_monitor(self): return self.spawn_process(monitor, self.result_queue) def reincarnate(self, process): """ :param process: the process to reincarnate :type process: Process """ if process == self.monitor: self.monitor = self.spawn_monitor() logger.error( _("reincarnated monitor {} after sudden death").format( process.name)) elif process == self.pusher: self.pusher = self.spawn_pusher() logger.error( _("reincarnated pusher {} after sudden death").format( process.name)) else: self.pool.remove(process) self.spawn_worker() if self.timeout and int(process.timer.value) == 0: # only need to terminate on timeout, otherwise we risk destabilizing the queues process.terminate() logger.warn( _("reincarnated worker {} after timeout").format( process.name)) elif int(process.timer.value) == -2: logger.info(_("recycled worker {}").format(process.name)) else: logger.error( _("reincarnated worker {} after death").format( process.name)) self.reincarnations += 1 def spawn_cluster(self): self.pool = [] Stat(self).save() # spawn worker pool for i in range(self.pool_size): self.spawn_worker() # spawn auxiliary self.monitor = self.spawn_monitor() self.pusher = self.spawn_pusher() # set worker cpu affinity if needed if psutil and Conf.CPU_AFFINITY: set_cpu_affinity(Conf.CPU_AFFINITY, [w.pid for w in self.pool]) def guard(self): logger.info( _('{} guarding cluster at {}').format(current_process().name, self.pid)) self.start_event.set() Stat(self).save() logger.info(_('Q Cluster-{} running.').format(self.parent_pid)) scheduler(list_key=self.list_key) counter = 0 # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: # Are you alive? if not p.is_alive() or (self.timeout and int(p.timer.value) == 0): self.reincarnate(p) continue # Decrement timer if work is being done if p.timer.value > 0: p.timer.value -= 1 # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += 1 if counter > 120: counter = 0 scheduler(list_key=self.list_key) # Save current status Stat(self).save() sleep(0.5) self.stop() def stop(self): Stat(self).save() name = current_process().name logger.info('{} stopping cluster processes'.format(name)) # Stopping pusher self.event_out.set() # Wait for it to stop while self.pusher.is_alive(): sleep(0.2) Stat(self).save() # Put poison pills in the queue for _ in range(len(self.pool)): self.task_queue.put('STOP') self.task_queue.close() # wait for the task queue to empty self.task_queue.join_thread() # Wait for all the workers to exit while len(self.pool): for p in self.pool: if not p.is_alive(): self.pool.remove(p) sleep(0.2) Stat(self).save() # Finally stop the monitor self.result_queue.put('STOP') self.result_queue.close() # Wait for the result queue to empty self.result_queue.join_thread() logger.info('{} waiting for the monitor.'.format(name)) # Wait for everything to close or time out count = 0 if not self.timeout: self.timeout = 30 while self.status() == Conf.STOPPING and count < self.timeout * 5: sleep(0.2) Stat(self).save() count += 1 # Final status Stat(self).save()
def MultiProcess(nproc, config, job_func, tasks, item, logger=None, done_func=None, except_func=None, except_abort=True): """A helper function for performing a task using multiprocessing. A note about the nomenclature here. We use the term "job" to mean the job of building a single file or image or stamp. The output of each job is gathered into the list of results that is returned. A task is a collection of one or more jobs that are all done by the same processor. For simple cases, each task is just a single job, but for things like a Ring test, the task needs to have the jobs for a full ring. The tasks argument is a list of tasks. Each task in that list is a list of jobs. Each job is a tuple consisting of (kwargs, k), where kwargs is the dict of kwargs to pass to the job_func and k is the index of this job in the full list of jobs. @param nproc How many processes to use. @param config The configuration dict. @param job_func The function to run for each job. It will be called as result = job_func(**kwargs) where kwargs is from one of the jobs in the task list. @param tasks A list of tasks to run. Each task is a list of jobs, each of which is a tuple (kwargs, k). @param item A string indicating what is being worked on. @param logger If given, a logger object to log progress. [default: None] @param done_func A function to run upon completion of each job. It will be called as done_func(logger, proc, k, result, t) where proc is the process name, k is the index of the job, result is the return value of that job, and t is the time taken. [default: None] @param except_func A function to run if an exception is encountered. It will be called as except_func(logger, proc, k, ex, tr) where proc is the process name, k is the index of the job that failed, ex is the exception caught, and tr is the traceback. [default: None] @param except_abort Whether an exception should abort the rest of the processing. If False, then the returned results list will not include anything for the jobs that failed. [default: True] @returns a list of the outputs from job_func for each job """ import time # The worker function will be run once in each process. # It pulls tasks off the task_queue, runs them, and puts the results onto the results_queue # to send them back to the main process. # The *tasks* can be made up of more than one *job*. Each job involves calling job_func # with the kwargs from the list of jobs. # Each job also carries with it its index in the original list of all jobs. def worker(task_queue, results_queue, config, logger): proc = current_process().name # The logger object passed in here is a proxy object. This means that all the arguments # to any logging commands are passed through the pipe to the real Logger object on the # other end of the pipe. This tends to produce a lot of unnecessary communication, since # most of those commands don't actually produce any output (e.g. logger.debug(..) commands # when the logging level is not DEBUG). So it is helpful to wrap this object in a # LoggerWrapper that checks whether it is worth sending the arguments back to the original # Logger before calling the functions. logger = LoggerWrapper(logger) if 'profile' in config and config['profile']: import cProfile, pstats, StringIO pr = cProfile.Profile() pr.enable() else: pr = None for task in iter(task_queue.get, 'STOP'): try: if logger: logger.debug( '%s: Received job to do %d %ss, starting with %s', proc, len(task), item, task[0][1]) for kwargs, k in task: t1 = time.time() kwargs['config'] = config kwargs['logger'] = logger result = job_func(**kwargs) t2 = time.time() results_queue.put((result, k, t2 - t1, proc)) except KeyboardInterrupt: raise except Exception as e: import traceback tr = traceback.format_exc() if logger: logger.debug('%s: Caught exception: %s\n%s', proc, str(e), tr) results_queue.put((e, k, tr, proc)) if logger: logger.debug('%s: Received STOP', proc) if pr: pr.disable() s = StringIO.StringIO() sortby = 'tottime' ps = pstats.Stats(pr, stream=s).sort_stats(sortby).reverse_order() ps.print_stats() logger.error( "*** Start profile for %s ***\n%s\n*** End profile for %s ***", proc, s.getvalue(), proc) njobs = sum([len(task) for task in tasks]) if nproc > 1: if logger: logger.warn("Using %d processes for %s processing", nproc, item) from multiprocessing import Process, Queue, current_process from multiprocessing.managers import BaseManager # Send the tasks to the task_queue. task_queue = Queue() for task in tasks: task_queue.put(task) # Temporarily mark that we are multiprocessing, so we know not to start another # round of multiprocessing later. config['current_nproc'] = nproc # The logger is not picklable, so we need to make a proxy for it so all the # processes can emit logging information safely. logger_proxy = GetLoggerProxy(logger) # Run the tasks. # Each Process command starts up a parallel process that will keep checking the queue # for a new task. If there is one there, it grabs it and does it. If not, it waits # until there is one to grab. When it finds a 'STOP', it shuts down. results_queue = Queue() p_list = [] for j in range(nproc): # The process name is actually the default name that Process would generate on its # own for the first time we do this. But after that, if we start another round of # multiprocessing, then it just keeps incrementing the numbers, rather than starting # over at Process-1. As far as I can tell, it's not actually spawning more # processes, so for the sake of the logging output, we name the processes explicitly. p = Process(target=worker, args=(task_queue, results_queue, config, logger_proxy), name='Process-%d' % (j + 1)) p.start() p_list.append(p) # In the meanwhile, the main process keeps going. We pull each set of images off of the # results_queue and put them in the appropriate place in the lists. # This loop is happening while the other processes are still working on their tasks. results = [None for k in range(njobs)] for kk in range(njobs): res, k, t, proc = results_queue.get() if isinstance(res, Exception): # res is really the exception, e # t is really the traceback # k is the index for the job that failed if except_func is not None: except_func(logger, proc, k, res, t) if except_abort: for j in range(nproc): p_list[j].terminate() raise res else: # The normal case if done_func is not None: done_func(logger, proc, k, res, t) results[k] = res # Stop the processes # The 'STOP's could have been put on the task list before starting the processes, or you # can wait. In some cases it can be useful to clear out the results_queue (as we just did) # and then add on some more tasks. We don't need that here, but it's perfectly fine to do. # Once you are done with the processes, putting nproc 'STOP's will stop them all. # This is important, because the program will keep running as long as there are running # processes, even if the main process gets to the end. So you do want to make sure to # add those 'STOP's at some point! for j in range(nproc): task_queue.put('STOP') for j in range(nproc): p_list[j].join() task_queue.close() # And clear this out, so we know that we're not multiprocessing anymore. config['current_nproc'] = nproc else: # nproc == 1 results = [None] * njobs for task in tasks: for kwargs, k in task: try: t1 = time.time() kwargs['config'] = config kwargs['logger'] = logger result = job_func(**kwargs) t2 = time.time() if done_func is not None: done_func(logger, None, k, result, t2 - t1) results[k] = result except KeyboardInterrupt: raise except Exception as e: import traceback tr = traceback.format_exc() if except_func is not None: except_func(logger, None, k, e, tr) if except_abort: raise # If there are any failures, then there will still be some Nones in the results list. # Remove them. results = [r for r in results if r is not None] return results
async def scanners_runner(scanners_conf: Dict, queue: mp.Queue) -> None: data_api = tradeapi.REST( base_url=config.prod_base_url, key_id=config.prod_api_key_id, secret_key=config.prod_api_secret, ) for scanner_name in scanners_conf: if scanner_name == "momentum": scanner_details = scanners_conf[scanner_name] try: recurrence = scanner_details.get("recurrence", None) target_strategy_name = scanner_details.get( "target_strategy_name", None) scanner_object = Momentum( provider=scanner_details["provider"], data_api=data_api, min_last_dv=scanner_details["min_last_dv"], min_share_price=scanner_details["min_share_price"], max_share_price=scanner_details["max_share_price"], min_volume=scanner_details["min_volume"], from_market_open=scanner_details["from_market_open"], today_change_percent=scanner_details["min_gap"], recurrence=timedelta( minutes=recurrence) if recurrence else None, target_strategy_name=target_strategy_name, max_symbols=scanner_details.get("max_symbols", config.total_tickers), ) tlog(f"instantiated momentum scanner") except KeyError as e: tlog( f"Error {e} in processing of scanner configuration {scanner_details}" ) exit(0) else: tlog(f"custom scanner {scanner_name} selected") scanner_details = scanners_conf[scanner_name] try: spec = importlib.util.spec_from_file_location( "module.name", scanner_details["filename"]) custom_scanner_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(custom_scanner_module) # type: ignore class_name = scanner_name custom_scanner = getattr(custom_scanner_module, class_name) if not issubclass(custom_scanner, Scanner): tlog( f"custom scanner must inherit from class {Scanner.__name__}" ) exit(0) scanner_details.pop("filename") if "recurrence" not in scanner_details: scanner_object = custom_scanner( data_api=data_api, **scanner_details, ) else: recurrence = scanner_details.pop("recurrence") scanner_object = custom_scanner( data_api=data_api, recurrence=timedelta(minutes=recurrence), **scanner_details, ) except Exception as e: tlog( f"[Error] scanners_runner.scanners_runner() for {scanner_name}:{e} " ) scanner_tasks.append( asyncio.create_task(scanner_runner(scanner_object, queue))) try: await asyncio.gather( *scanner_tasks, return_exceptions=True, ) except asyncio.CancelledError: tlog( "scanners_runner.scanners_runner() cancelled, closing scanner tasks" ) for task in scanner_tasks: tlog( f"scanners_runner.scanners_runner() requesting task {task.get_name()} to cancel" ) task.cancel() try: await task except asyncio.CancelledError: tlog( "scanners_runner.scanners_runner() task is cancelled now") finally: queue.close() tlog("scanners_runner.scanners_runner() done.")
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, ) -> None: env_factory: Callable[[int, List[SideChannel]], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) shared_float_properties = FloatPropertiesChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) env: BaseEnv = env_factory( worker_id, [shared_float_properties, engine_configuration_channel]) def _send_response(cmd_name, payload): parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.get_agent_groups(): all_step_result[brain_name] = env.get_step_result(brain_name) return all_step_result def external_brains(): result = {} for brain_name in env.get_agent_groups(): result[brain_name] = group_spec_to_brain_parameters( brain_name, env.get_agent_group_spec(brain_name)) return result try: while True: cmd: EnvironmentCommand = parent_conn.recv() if cmd.name == "step": all_action_info = cmd.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. step_response = StepResponse(all_step_result, get_timer_root()) step_queue.put( EnvironmentResponse("step", worker_id, step_response)) reset_timers() elif cmd.name == "external_brains": _send_response("external_brains", external_brains()) elif cmd.name == "get_properties": reset_params = shared_float_properties.get_property_dict_copy() _send_response("get_properties", reset_params) elif cmd.name == "reset": for k, v in cmd.payload.items(): shared_float_properties.set_property(k, v) env.reset() all_step_result = _generate_all_results() _send_response("reset", all_step_result) elif cmd.name == "close": break except (KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException): logger.info( f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put(EnvironmentResponse("env_close", worker_id, None)) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")
def parmap(f, args, workers=None): ''' evaluates [f(a) for a in args] in parallel if workers is 0 then the built-in map is used. If workers is greater than one then the parent process spawns that many worker processes to evaluate the map. If the *mkl* package is installed then this function first sets the maximum number of allowed threads per process to 1. This is to help prevents spawned subprocesses from using multiple cores. The number of allowed threads is reset after all subprocesses have finished. Parameters ---------- f : callable a : list list of arguments to *f* workers : int, optional number of subprocess to spawn. Defaults to half the available cores plus one ''' if workers is None: # starting_threads is a good estimate for the number of processes # that can be simultaneously running workers = cpu_count() // 2 + 1 if workers < 0: raise ValueError('number of worker processes must be 0 or greater') if workers == 0: # use the built-in sequential map return map(f, args) # make sure that lower level functions are not running in parallel if _HAS_MKL: starting_threads = mkl.get_max_threads() mkl.set_num_threads(1) # q_in has a max size of 1 so that args is not copied over to # the next process until absolutely necessary q_in = Queue(1) q_out = Queue() # any exceptions found by the child processes are put in this queue # and then raised by the parent q_err = Queue() # spawn worker processes procs = [] for i in range(workers): p = Process(target=_f, args=(f, q_in, q_out, q_err)) # process is starting and waiting for something to be put on q_in p.start() procs += [p] submitted_tasks = 0 for a in args: q_in.put((submitted_tasks, a)) submitted_tasks += 1 # indicate that nothing else will be added for i in range(workers): q_in.put(('DONE', None)) # allocate list of Nones and then fill it in with the results val_list = [None for i in range(submitted_tasks)] err_list = [None for i in range(submitted_tasks)] for i in range(submitted_tasks): idx, err = q_err.get() err_list[idx] = err idx, val = q_out.get() val_list[idx] = val # terminate all processes for p in procs: p.join() # close queues q_in.close() q_out.close() q_err.close() # raise an error if any were found if any([e is not None for e in err_list]): raise ParmapError(err_list) # reset the number of threads to its original value if _HAS_MKL: mkl.set_num_threads(starting_threads) return val_list
def execute(args, job_args): """ Executes a weather/fire simulation. :param args: a dictionary with all to start the simulationfollowing keys :param job_args: a the original json given the forecast Keys in args: :param grid_code: the (unique) code of the grid that is used :param sys_install_path: system installation directory :param start_utc: start time of simulation in UTC :param end_utc: end time of simulation in UTC :param workspace_path: workspace directory :param wps_install_path: installation directory of WPS that will be used :param wrf_install_path: installation directory of WRF that will be used :param grib_source: a string identifying a valid GRIB2 source :param wps_namelist_path: the path to the namelist.wps file that will be used as template :param wrf_namelist_path: the path to the namelist.input file that will be used as template :param fire_namelist_path: the path to the namelist.fire file that will be used as template :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off """ # step 0 initialize the job state from the arguments js = JobState(args) jobdir = osp.abspath(osp.join(js.workspace_path, js.job_id)) make_clean_dir(jobdir) json.dump(job_args, open(osp.join(jobdir, 'input.json'), 'w'), indent=4, separators=(',', ': ')) jsub = make_job_file(js) json.dump(jsub, open(jsub.jobfile, 'w'), indent=4, separators=(',', ': ')) logging.info("job %s starting [%d hours to forecast]." % (js.job_id, js.fc_hrs)) sys.stdout.flush() send_email(js, 'start', 'Job %s started.' % js.job_id) # read in all namelists js.wps_nml = f90nml.read(js.args['wps_namelist_path']) js.wrf_nml = f90nml.read(js.args['wrf_namelist_path']) js.fire_nml = f90nml.read(js.args['fire_namelist_path']) js.ems_nml = None if 'emissions_namelist_path' in js.args: js.ems_nml = f90nml.read(js.args['emissions_namelist_path']) # Parse and setup the domain configuration js.domain_conf = WPSDomainConf(js.domains) num_doms = len(js.domain_conf) js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms js.wps_nml['share']['interval_seconds'] = 3600 logging.info("number of domains defined is %d." % num_doms) # build directories in workspace js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps')) js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf')) #check_obj(args,'args') #check_obj(js,'Initial job state') # step 1: clone WPS and WRF directories logging.info("cloning WPS into %s" % js.wps_dir) cln = WRFCloner(js.args) cln.clone_wps(js.wps_dir, js.grib_source.vtables(), []) # step 2: process domain information and patch namelist for geogrid js.wps_nml['geogrid']['geog_data_path'] = js.args['wps_geog_path'] js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir, js.wps_dir) f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True) # do steps 2 & 3 & 4 in parallel (two execution streams) # -> GEOGRID -> # -> GRIB2 download -> UNGRIB -> proc_q = Queue() geogrid_proc = Process(target=run_geogrid, args=(js, proc_q)) grib_proc = Process(target=retrieve_gribs_and_run_ungrib, args=(js, proc_q)) logging.info('starting GEOGRID and GRIB2/UNGRIB') geogrid_proc.start() grib_proc.start() # wait until both tasks are done logging.info('waiting until both tasks are done') grib_proc.join() geogrid_proc.join() if proc_q.get() != 'SUCCESS': return if proc_q.get() != 'SUCCESS': return proc_q.close() # step 5: execute metgrid after ensuring all grids will be processed js.domain_conf.prepare_for_metgrid(js.wps_nml) f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True) logging.info("running METGRID") Metgrid(js.wps_dir).execute().check_output() send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id) logging.info("cloning WRF into %s" % js.wrf_dir) # step 6: clone wrf directory, symlink all met_em* files, make namelists cln.clone_wrf(js.wrf_dir, []) symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*") time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms) js.wrf_nml['time_control'].update(time_ctrl) update_namelist(js.wrf_nml, js.grib_source.namelist_keys()) if 'ignitions' in js.args: update_namelist(js.wrf_nml, render_ignitions(js, num_doms)) # if we have an emissions namelist, automatically turn on the tracers if js.ems_nml is not None: logging.debug('namelist.fire_emissions given, turning on tracers') f90nml.write(js.ems_nml, osp.join(js.wrf_dir, 'namelist.fire_emissions'), force=True) js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms f90nml.write(js.wrf_nml, osp.join(js.wrf_dir, 'namelist.input'), force=True) f90nml.write(js.fire_nml, osp.join(js.wrf_dir, 'namelist.fire'), force=True) # step 7: execute real.exe logging.info("running REAL") # try to run Real twice as it sometimes fails the first time # it's not clear why this error happens try: Real(js.wrf_dir).execute().check_output() except Exception as e: logging.error('Real step failed with exception %s, retrying ...' % str(e)) Real(js.wrf_dir).execute().check_output() # step 7b: if requested, do fuel moisture DA if js.fmda is not None: logging.info('running fuel moisture data assimilation') for dom in js.fmda.domains: assimilate_fm10_observations( osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token) # step 8: execute wrf.exe on parallel backend logging.info('submitting WRF job') send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id) js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10] jsub.job_num = WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn, js.wall_time_hrs) send_email( js, 'wrf_exec', 'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id)) logging.info( "WRF job %s submitted with id %s, waiting for rsl.error.0000" % (jsub.job_num, js.task_id)) jobfile = osp.abspath(osp.join(js.workspace_path, js.job_id, 'job.json')) json.dump(jsub, open(jobfile, 'w'), indent=4, separators=(',', ': ')) process_output(js.job_id)
def fit(self, num_accepted_steps: int, num_chains: int, burn_rate: float, down_sample_frequency: int, beta: float, cpu=None, initialise=True): # Check input parameters self.__check_input__() print("Performing MCMC Analysis") # Create results directory if not os.path.isdir(os.path.join(os.getcwd(), "mcmc_results")): os.mkdir(os.path.join(os.getcwd(), "mcmc_results")) if not os.path.isdir( os.path.join(os.getcwd(), "mcmc_results", "chain_results")): os.mkdir(os.path.join(os.getcwd(), "mcmc_results", "chain_results")) # Define simulation parameters for instance self.num_samples = num_accepted_steps self.num_chains = num_chains self.burn_in = burn_rate self.down_sample = down_sample_frequency # Selecting optimal beta (scale factor for MSE) if beta == -1: self.__autochoose_beta__() # Overdisperse chains if initialise == True: initial_parameters = self.__generate_parameters_from_priors__( self.num_chains) else: initial_parameters = [{ key: self.model.parameters[key] for key in self.parameters_to_fit } for _ in range(self.num_chains)] # Sample using MCMC print("Sampling from posterior distribution") # Set up simulation processes if self.num_chains >= cpu_count(): number_of_processes = cpu_count() - 1 else: number_of_processes = self.num_chains if cpu is not None: number_of_processes = cpu # Manual override of core number selection print("Using {} processes".format(number_of_processes)) with open(os.path.join('mcmc_results', 'progress.txt'), 'w') as f: # clear previous progress report f.write('') jobs = Queue() # put jobs on queue result = JoinableQueue() countQ = JoinableQueue() # Start up chains # Prepare instance for multiprocessing by pickling build_model = [ self.model.name, self.parameters_to_fit, self.priors, self.jump_distributions ] build_data = [self.data.data_set, self.data.name, self.data.conditions] # Run chains if number_of_processes == 1: jobs.put([initial_parameters[0]]) jobs.put(None) self.__run_chain__(0, jobs, result, countQ) else: # Put jobs in queue for m in range(self.num_chains): jobs.put([initial_parameters[m]]) # Add signals for each process that there are no more jobs for w in range(number_of_processes): jobs.put(None) [ Process(target=__unpackMCMC__, args=(i, jobs, result, countQ, build_model, build_data, self.model.parameters, self.temperature)).start() for i in range(number_of_processes) ] # Pull in the results from each thread pool_results = [] chain_attempts = [] for m in range(self.num_chains): print("Getting results") r = result.get() pool_results.append(r) result.task_done() a = countQ.get() chain_attempts.append(a) # close all extra threads jobs.close() result.join() result.close() countQ.close() # Perform data analysis # Record average acceptance across all chains self.average_acceptance = np.mean([el[1] for el in chain_attempts]) print("Average acceptance rate was {:.1f}%".format( self.average_acceptance)) # Consolidate results into attributes if self.num_chains != 1: for chain in pool_results: self.chain_lengths.append(len(chain)) self.parameter_history += chain[0] self.scale_factor_history += chain[1] self.score_history += chain[2] # Perform burn-in and down sampling for chain in pool_results: try: sample_pattern = range(int(burn_rate * len(chain)), len(chain), down_sample_frequency) self.thinned_parameter_samples += [ chain[0][i] for i in sample_pattern ] self.thinned_parameter_scale_factors += [ chain[1][i] for i in sample_pattern ] self.thinned_parameter_scores += [ chain[2][i] for i in sample_pattern ] except IndexError: pass # Write summary file with open(os.path.join("mcmc_results", "simulation_summary.txt"), 'w') as f: f.write('Temperature used was {}\n'.format(self.beta)) f.write('Number of chains = {}\n'.format(self.num_chains)) f.write("Average acceptance rate was {:.1f}%\n".format( self.average_acceptance)) f.write("Initial conditions were\n") for i in initial_parameters: f.write(str(i)) f.write("\n") f.write("Individual chain acceptance rates were:\n") for i in chain_attempts: f.write("Chain {}: {:.1f}%".format(i[0], i[1])) # Save object self.save(alt_filename=os.path.join("mcmc_results", "mcmc_fit.p")) pickle.dump( self.parameter_history, open(os.path.join('mcmc_results', 'mcmc_parameter_history.p'), 'wb'), 2) pickle.dump( self.scale_factor_history, open(os.path.join('mcmc_results', 'mcmc_scale_factor_history.p'), 'wb'), 2) pickle.dump( self.score_history, open(os.path.join('mcmc_results', 'mcmc_score_history.p'), 'wb'), 2)
class PmakeManager(Manager): """ Specialization of Manager for local multiprocessing, using an adhoc implementation of "pool" because of bugs of the Python 2.7 implementation of pool multiprocessing. """ queues = {} @contract(num_processes='int') def __init__(self, context, cq, num_processes, recurse=False, new_process=False, show_output=False): Manager.__init__(self, context=context, cq=cq, recurse=recurse) self.num_processes = num_processes self.last_accepted = 0 self.new_process = new_process self.show_output = show_output if new_process and show_output: msg = ('Compmake does not yet support echoing stdout/stderr ' 'when jobs are run in a new process.') warning(msg) self.cleaned = False def process_init(self): self.event_queue = Queue(1000) self.event_queue_name = str(id(self)) PmakeManager.queues[self.event_queue_name] = self.event_queue # info('Starting %d processes' % self.num_processes) self.subs = {} # name -> sub # available + processing + aborted = subs.keys self.sub_available = set() self.sub_processing = set() self.sub_aborted = set() db = self.context.get_compmake_db() storage = db.basepath # XXX: logs = os.path.join(storage, 'logs') #self.signal_queue = Queue() for i in range(self.num_processes): name = 'parmake_sub_%02d' % i write_log = os.path.join(logs, '%s.log' % name) make_sure_dir_exists(write_log) signal_token = name self.subs[name] = PmakeSub(name=name, signal_queue=None, signal_token=signal_token, write_log=write_log) self.job2subname = {} # all are available self.sub_available.update(self.subs) self.max_num_processing = self.num_processes # XXX: boiler plate def get_resources_status(self): resource_available = {} assert len(self.sub_processing) == len(self.processing) if not self.sub_available: msg = 'already %d processing' % len(self.sub_processing) if self.sub_aborted: msg += ' (%d workers aborted)' % len(self.sub_aborted) resource_available['nproc'] = (False, msg) # this is enough to continue return resource_available else: resource_available['nproc'] = (True, '') return resource_available @contract(reasons_why_not=dict) def can_accept_job(self, reasons_why_not): if len(self.sub_available) == 0 and len(self.sub_processing) == 0: # all have failed msg = 'All workers have aborted.' raise MakeHostFailed(msg) resources = self.get_resources_status() some_missing = False for k, v in resources.items(): if not v[0]: some_missing = True reasons_why_not[k] = v[1] if some_missing: return False return True def instance_job(self, job_id): publish(self.context, 'worker-status', job_id=job_id, status='apply_async') assert len(self.sub_available) > 0 name = sorted(self.sub_available)[0] self.sub_available.remove(name) assert not name in self.sub_processing self.sub_processing.add(name) sub = self.subs[name] self.job2subname[job_id] = name if self.new_process: f = parmake_job2_new_process args = (job_id, self.context) else: f = parmake_job2 args = (job_id, self.context, self.event_queue_name, self.show_output) async_result = sub.apply_async(f, args) return async_result def event_check(self): if not self.show_output: return while True: try: event = self.event_queue.get(block=False) # @UndefinedVariable event.kwargs['remote'] = True broadcast_event(self.context, event) except Empty: break def process_finished(self): if self.cleaned: return self.cleaned = True # print('process_finished()') self.event_queue.close() del PmakeManager.queues[self.event_queue_name] for name in self.sub_processing: self.subs[name].proc.terminate() for name in self.sub_available: self.subs[name].terminate() # XXX: in practice this never works well # if False: # XXX: ... so we just kill them mercilessly if True: # print('killing') for name in self.sub_processing: pid = self.subs[name].proc.pid os.kill(pid, signal.SIGKILL) # print('killed pid %s for %s' % (name, pid)) #print('process_finished() finished') if False: timeout = 100 for name in self.sub_available: print('joining %s' % name) self.subs[name].proc.join(timeout) killtree() # print('process_finished(): cleaned up') # Normal outcomes def job_failed(self, job_id, deleted_jobs): Manager.job_failed(self, job_id, deleted_jobs) self._clear(job_id) def job_succeeded(self, job_id): Manager.job_succeeded(self, job_id) self._clear(job_id) def _clear(self, job_id): assert job_id in self.job2subname name = self.job2subname[job_id] del self.job2subname[job_id] assert name in self.sub_processing assert name not in self.sub_available self.sub_processing.remove(name) self.sub_available.add(name) def host_failed(self, job_id): Manager.host_failed(self, job_id) assert job_id in self.job2subname name = self.job2subname[job_id] del self.job2subname[job_id] assert name in self.sub_processing assert name not in self.sub_available self.sub_processing.remove(name) # put in sub_aborted self.sub_aborted.add(name) def cleanup(self): self.process_finished()
class DroneAI(): def __init__(self): print "starting drone..." self.drone = libardrone.ARDrone() self.K, self.d = parse_calib('calibration.txt') self.marker_frame = np.array( [[0, 0, 0], [MARKER_SIZE, 0, 0], [MARKER_SIZE, MARKER_SIZE, 0], [0, MARKER_SIZE, 0]], dtype=np.float32) self.ALLOW_FLIGHT = False self.record = Queue(1) self.frame_queue = Queue(2) self.frame_worker = Process(target=self.query_frames) self.frame_worker.start() def fly_test(self): print "taking off..." self.drone.takeoff() time.sleep(5) print "landing..." self.drone.land() time.sleep(5) def stop(self): print "Shutting down..." self.record.put(True) self.drone.land() time.sleep(2) self.drone.halt() print "Done" def query_frames(self): print "Starting frame worker" cap = cv2.VideoCapture("tcp://192.168.1.1:5555") while self.record.empty(): ret, frame = cap.read() if frame == None: time.sleep(0.1) continue if self.frame_queue.full(): self.frame_queue.get() self.frame_queue.put(frame) cap.release() self.frame_queue.close() return def get_vid_frame(self): return cv2.cvtColor(self.frame_queue.get(), cv2.COLOR_BGR2GRAY) def get_target_pose(self, frame): markers = detect_markers(frame, BASE_MARKER) if not markers: return None, None marker = markers[0] raw_points = [marker.contours[i][0] for i in xrange(4)] rot_points = [] for i in xrange(4): j = (i - marker.rotation) % 4 rot_points.append(raw_points[j]) rot_points = np.array(rot_points, dtype=np.float32) _, rvec, tvec = cv2.solvePnP(np.array([self.marker_frame]), np.array([rot_points]), self.K, self.d, flags=cv2.CV_ITERATIVE) return rvec, tvec def render_target_pose(self, frame, rvec, tvec): marker_pts = np.array([[0, 0, 0], [5, 0, 0], [0, 5, 0], [0, 0, 5]], dtype=np.float32) projected_pts, _ = cv2.projectPoints(marker_pts, rvec, tvec, self.K, self.d) cv2.line(frame, tuple(projected_pts[0][0]), tuple(projected_pts[1][0]), (0, 255, 0), 2) cv2.line(frame, tuple(projected_pts[0][0]), tuple(projected_pts[2][0]), (0, 0, 255), 2) cv2.line(frame, tuple(projected_pts[0][0]), tuple(projected_pts[3][0]), (255, 0, 0), 2) return frame def record_images(self, save_dir): if not os.path.exists(save_dir): os.makedirs(save_dir) buf = [] pygame.init() W, H = 640, 480 screen = pygame.display.set_mode((W, H)) clock = pygame.time.Clock() running = True recording = False while running: for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: self.stop() running = False recording = False elif event.key == pygame.K_RETURN: recording = True try: frame = self.get_vid_frame() frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) if recording: buf.append(frame.copy()) rvec, tvec = self.get_target_pose(frame) if rvec != None and tvec != None: frame = self.render_target_pose(frame, rvec, tvec) frame = np.rot90(frame) frame = pygame.surfarray.make_surface(frame) screen.blit(frame, (0, 0)) # battery status hud_color = (255, 0, 0) if drone.navdata.get( 'drone_state', dict()).get('emergency_mask', 1) else (10, 10, 255) bat = drone.navdata.get(0, dict()).get('battery', 0) f = pygame.font.Font(None, 20) hud = f.render('Battery: %i%%' % bat, True, hud_color) screen.blit(hud, (10, 10)) except: pass pygame.display.flip() clock.tick(50) pygame.display.set_caption("FPS: %.2f" % clock.get_fps()) print "Saving images to {0}".format(save_dir) for i in xrange(len(buf)): img = buf[i] path = os.path.join(save_dir, 'img_{0}.jpg'.format(i)) cv2.imwrite(path, img) def run(self): pygame.init() W, H = 640, 480 screen = pygame.display.set_mode((W, H)) clock = pygame.time.Clock() running = True while running: for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: self.stop() running = False elif event.key == pygame.K_RETURN: self.drone.takeoff() self.ALLOW_FLIGHT = True elif event.type == pygame.KEYUP: print "Taking off..." self.drone.hover() try: frame = self.get_vid_frame() frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) rvec, tvec = self.get_target_pose(frame) #print rvec, tvec #rvec = None #tvec = None if rvec != None and tvec != None: print self.ALLOW_FLIGHT if self.ALLOW_FLIGHT: theta = np.arctan2(tvec[0], tvec[2]) x, y, z, r = 0, 0, 0, 0 if tvec[0] > np.pi / 6: r = 0.2 elif tvec[0] < -np.pi / 6: r = -0.2 #if tvec[2] > 50: # z = -0.1 #elif tvec[0] < 45: # z = 0.1 print 'sending move command {0} {1} {2} {3}'.format( x, y, z, r) self.drone.move_x(x, y, z, r) print 'rendering pose' frame = self.render_target_pose(frame, rvec, tvec) print 'sleeping' time.sleep(0.5) else: self.drone.hover() frame = np.rot90(frame) frame = pygame.surfarray.make_surface(frame) screen.blit(frame, (0, 0)) # battery status hud_color = (255, 0, 0) if self.drone.navdata.get( 'drone_state', dict()).get('emergency_mask', 1) else (10, 10, 255) bat = self.drone.navdata.get(0, dict()).get('battery', 0) f = pygame.font.Font(None, 20) hud = f.render('Battery: %i%%' % bat, True, hud_color) screen.blit(hud, (10, 10)) except Exception as e: print e pass pygame.display.flip() clock.tick(50) pygame.display.set_caption("FPS: %.2f" % clock.get_fps()) pygame.quit()
logger = logging.getLogger("Tester") # Threading Locks, Events stop = Event() lock = Lock() stop.clear() # Setting up queue queue = Queue(maxsize=32) print("Setting up Process") process = Process(target=loop, args=( queue, stop, lock, logger, )) process.daemon = True process.start() print("Provide data to Process queue") data = [0, 1, 2, 3, 4, 5, 6, 7] if not queue.full(): queue.put(data, block=False) # Finish print("Cleaning up") stop.set() process.join() process.close() queue.close()
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, log_level: int = logging_util.INFO, ) -> None: env_factory: Callable[[int, List[SideChannel]], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) env_parameters = EnvironmentParametersChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) stats_channel = StatsSideChannel() env: BaseEnv = None # Set log level. On some platforms, the logger isn't common with the # main process, so we need to set it again. logging_util.set_log_level(log_level) def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None: parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.behavior_specs: all_step_result[brain_name] = env.get_steps(brain_name) return all_step_result try: env = env_factory( worker_id, [env_parameters, engine_configuration_channel, stats_channel]) while True: req: EnvironmentRequest = parent_conn.recv() if req.cmd == EnvironmentCommand.STEP: all_action_info = req.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse(all_step_result, get_timer_root(), env_stats) step_queue.put( EnvironmentResponse(EnvironmentCommand.STEP, worker_id, step_response)) reset_timers() elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS: _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.RESET: env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) elif req.cmd == EnvironmentCommand.CLOSE: break except ( KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException, UnityEnvironmentException, UnityCommunicatorStoppedException, ) as ex: logger.info( f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)) _send_response(EnvironmentCommand.ENV_EXITED, ex) except Exception as ex: logger.error( f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception." ) step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)) _send_response(EnvironmentCommand.ENV_EXITED, ex) finally: logger.debug(f"UnityEnvironment worker {worker_id} closing.") if env is not None: env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.") parent_conn.close() step_queue.put( EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None)) step_queue.close()
class TestQueueFile(TFCTestCase): def setUp(self): self.f_queue = Queue() def tearDown(self): while not self.f_queue.empty(): self.f_queue.get() time.sleep(0.1) self.f_queue.close() with ignored(OSError): os.remove('testfile.txt') def test_aborted_file(self): # Setup input_data = os.urandom(5) with open('testfile.txt', 'wb+') as f: f.write(input_data) window = TxWindow(name='Alice', type=WIN_TYPE_CONTACT, type_print='contact', uid='*****@*****.**') settings = Settings(session_traffic_masking=True, disable_gui_dialog=True) gateway = Gateway(txm_inter_packet_delay=0.02) input_list = ['./testfile.txt', 'No'] gen = iter(input_list) builtins.input = lambda _: str(next(gen)) # Test self.assertFR("File selection aborted.", queue_file, window, settings, self.f_queue, gateway) def test_file_queue_short_traffic_masking(self): # Setup input_data = os.urandom(5) with open('testfile.txt', 'wb+') as f: f.write(input_data) window = TxWindow(name='Alice', type=WIN_TYPE_CONTACT, type_print='contact', uid='*****@*****.**', log_messages=True) settings = Settings(session_traffic_masking=True, disable_gui_dialog=True) gateway = Gateway(txm_inter_packet_delay=0.02) input_list = ['./testfile.txt', 'Yes'] gen = iter(input_list) builtins.input = lambda _: str(next(gen)) # Test self.assertIsNone(queue_file(window, settings, self.f_queue, gateway)) time.sleep(0.1) self.assertEqual(self.f_queue.qsize(), 1) q_data, log_messages, log_as_ph = self.f_queue.get() self.assertIsInstance(q_data, bytes) self.assertTrue(log_messages) self.assertTrue(log_as_ph) def test_file_queue_long_normal(self): # Setup input_data = os.urandom(2000) with open('testfile.txt', 'wb+') as f: f.write(input_data) window = TxWindow(name='Alice', type=WIN_TYPE_CONTACT, type_print='contact', uid='*****@*****.**', window_contacts=[create_contact()], log_messages=True) settings = Settings(session_traffic_masking=False, disable_gui_dialog=True, confirm_sent_files=True, multi_packet_random_delay=True) gateway = Gateway(txm_inter_packet_delay=0.02) input_list = ['./testfile.txt', 'Yes'] gen = iter(input_list) builtins.input = lambda _: str(next(gen)) # Test self.assertIsNone(queue_file(window, settings, self.f_queue, gateway)) time.sleep(0.1) self.assertEqual(self.f_queue.qsize(), 11) packet, settings, rx_account, tx_account, log_messages, log_as_ph, win_uid = self.f_queue.get( ) self.assertIsInstance(packet, bytes) self.assertIsInstance(settings, Settings) self.assertEqual(rx_account, '*****@*****.**') self.assertEqual(tx_account, '*****@*****.**') self.assertEqual(win_uid, '*****@*****.**') self.assertTrue(log_messages) self.assertTrue(log_as_ph)
def creator(data, q): """ Creates data to be consumed and waits for the consumer to finish processing """ print('Creating data and putting it on the queue') for item in data: q.put(item) def my_consumer(q): while True: data = q.get() print('data found to be processed: {}'.format(data)) processed = data * 2 print(processed) if data is sentinel: break if __name__ == '__main__': q = Queue() data = [5, 10, 13, -1] process_one = Process(target=creator, args=(data, q)) process_two = Process(target=my_consumer, args=(q, )) process_one.start() process_two.start() q.close() q.join_thread() process_one.join() process_two.join()
for p in procs: p.start() while(len(procs)): time.sleep(10) procs = list(filter(lambda x: x.is_alive(), procs)) print(f"[INFO] Remaining processes: {procs}") # Get sets from child results = parent_queue.get() authors = parent_queue.get() next_authors = parent_queue.get() # Close previous queue and create new one parent_queue.close() start_wait = time.perf_counter() sp.join() print(f"[INFO] Waited {time.perf_counter()-start_wait:.2f} seconds for DB to digest") parent_queue = Queue() sp = Process(target=start_sp, args=(queue, parent_queue,), daemon=True, name=f"spider") procs.append(sp) sp.start() # Put sets back into child parent_queue.put(results) parent_queue.put(authors) parent_queue.put(next_authors) print("[INFO] Exiting")
class TestSendPacket(unittest.TestCase): """\ This function is by far the most critical to security in TxM, as it must detect output of key material. Plaintext length must always be evaluated to ensure constant ciphertext length and hiding of output data type. The most likely place for error is going to be the tx_harac attribute of keyset, as it's the only data loaded from the sensitive key database that is sent to contact. Alternative place could be a bug in implementation where account strings would incorrectly contain a byte string that contained key material. """ def setUp(self): self.l_queue = Queue() self.key_list = KeyList(nicks=['Alice']) self.settings = Settings() self.gateway = Gateway() def tearDown(self): while not self.l_queue.empty(): self.l_queue.get() time.sleep(0.1) self.l_queue.close() def test_message_length(self): # Check that only 256-byte plaintext messages are ever allowed for l in range(1, 256): with self.assertRaises(SystemExit): send_packet(self.key_list, self.gateway, self.l_queue, bytes(l), self.settings, '*****@*****.**', '*****@*****.**', True) for l in range(257, 300): with self.assertRaises(SystemExit): send_packet(self.key_list, self.gateway, self.l_queue, bytes(l), self.settings, '*****@*****.**', '*****@*****.**', True) def test_invalid_harac_raises_raises_struct_error(self): # Check that in case where internal error caused bytestring (possible key material) # to end up in hash ratchet value, system raises some error that prevents output of packet. # In this case the error comes from unsuccessful encoding of hash ratchet counter. for l in range(1, 33): key_list = KeyList() key_list.keysets = [ create_keyset(tx_key=KEY_LENGTH * b'\x02', tx_harac=l * b'k') ] with self.assertRaises(struct.error): send_packet(key_list, self.gateway, self.l_queue, bytes(ASSEMBLY_PACKET_LEN), self.settings, '*****@*****.**', '*****@*****.**', True) def test_invalid_account_raises_stop_iteration(self): # Check that in case where internal error caused bytestring (possible key material) # to end up in account strings, System raises some error that prevents output of packet. # In this case the error comes from unsuccessful encoding of string (AttributeError) # or KeyList lookup error when bytes are used (StopIteration). These errors are not catched. with self.assertRaises(StopIteration): send_packet(self.key_list, self.gateway, self.l_queue, bytes(ASSEMBLY_PACKET_LEN), self.settings, b'*****@*****.**', '*****@*****.**', True) with self.assertRaises(AttributeError): send_packet(self.key_list, self.gateway, self.l_queue, bytes(ASSEMBLY_PACKET_LEN), self.settings, '*****@*****.**', b'*****@*****.**', True) def test_valid_message_packet(self): # Setup settings = Settings(multi_packet_random_delay=True) gateway = Gateway() key_list = KeyList(master_key=bytes(KEY_LENGTH)) key_list.keysets = [ create_keyset(tx_key=KEY_LENGTH * b'\x02', tx_harac=8) ] # Test self.assertIsNone( send_packet(key_list, gateway, self.l_queue, bytes(ASSEMBLY_PACKET_LEN), settings, '*****@*****.**', '*****@*****.**', True)) self.assertEqual(len(gateway.packets), 1) self.assertEqual(len(gateway.packets[0]), 396) time.sleep(0.1) self.assertFalse(self.l_queue.empty()) def test_valid_command_packet(self): """Test that commands are output as they should. Since command packets have no trailer, and since only user's RxM has local decryption key, encryption with any key recipient is not already in possession of does not compromise plaintext. """ # Setup key_list = KeyList(master_key=bytes(KEY_LENGTH)) key_list.keysets = [create_keyset(LOCAL_ID)] # Test self.assertIsNone( send_packet(key_list, self.gateway, self.l_queue, bytes(ASSEMBLY_PACKET_LEN), self.settings)) time.sleep(0.1) self.assertEqual(len(self.gateway.packets), 1) self.assertEqual(len(self.gateway.packets[0]), 365) self.assertEqual(self.l_queue.qsize(), 1)
def main(course_file='courses.txt', clear_db=True): """Main method/entrypoint """ # Courses work_queue = JoinableQueue() skipped_queue = Queue(0) with open(course_file, "r") as f: for line in f: work_queue.put(line.strip()) # For holding the database info db_queue = Queue() db_lock = Lock() # Create the threads process_list = [] for i in range(args.Workers): p = multiprocessing.Process(target=process_data, args=(work_queue, skipped_queue, db_queue, db_lock)) process_list.append(p) p.start() work_queue.join() work_queue.close() db_lock.acquire() try: print('Done work. Got {0} courses, skipped {1}'.format( db_queue.qsize(), skipped_queue.qsize())) # qsize is broken on macOS except: print('\nDone work, writing course database to files') db_lock.release() print() # Announce skipped courses with open('skippedCourses.txt', 'w') as f: if not skipped_queue.empty(): print('These courses were skipped: ') while not skipped_queue.empty(): skipped_course = skipped_queue.get() print(' {0}'.format(skipped_course)) to_file = skipped_course.split(',', 1)[0] f.write(u'{0}\n'.format(to_file).encode('utf8')) print() courses_list = [] sections_list = [] activities_list = [] while not db_queue.empty(): course = db_queue.get() # course name courses_list.append(course[0]) # sections for section in course[1]: sections_list.append(section) # activities for activity in course[2]: activities_list.append(activity) db_queue.close() db_queue.join_thread() # Remove any duplicates courses_list = list(set(courses_list)) sections_list = list(set(sections_list)) activities_list = list(set(activities_list)) # Change hh:20 to hh:30, hh:50 to (hh+1):00 # (Somewhat misleading compared to the real schedule times, # but gives correctly-sized blocks in all clients without # requiring any clients to update) activities_list = [a.replace(':20,', ':30,') for a in activities_list] activities_list = [a.replace('9:50,', '10:00,') for a in activities_list] activities_list = [a.replace('12:50,', '13:00,') for a in activities_list] activities_list = [a.replace('15:50,', '16:00,') for a in activities_list] activities_list = [a.replace('18:50,', '19:00,') for a in activities_list] activities_list = [a.replace('21:50,', '22:00,') for a in activities_list] # Print total count of all items print('Courses: {0}'.format(len(courses_list))) print('Sections: {0}'.format(len(sections_list))) print('Activities: {0}'.format(len(activities_list))) # Write courses to files with open('db_courses.csv', 'w' if clear_db else 'a') as f: for course in courses_list: writeline = u'{0}\n'.format(course).encode('utf8') f.write(writeline) # Write sections to files with open('db_sections.csv', 'w' if clear_db else 'a') as f: for section in sections_list: writeline = u'{0}\n'.format(section).encode('utf8') f.write(writeline) # Write activities to files with open('db_activities.csv', 'w' if clear_db else 'a') as f: for activity in activities_list: writeline = u'{0}\n'.format(activity).encode('utf8') f.write(writeline)
class TestQueuePackets(unittest.TestCase): def setUp(self): self.settings = Settings() self.queue = Queue() self.window = TxWindow(uid='*****@*****.**', log_messages=True) self.window.window_contacts = [create_contact()] def tearDown(self): while not self.queue.empty(): self.queue.get() time.sleep(0.1) self.queue.close() def test_queue_message_traffic_masking(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), MESSAGE) self.settings.session_traffic_masking = True # Test self.assertIsNone( queue_packets(packet_list, MESSAGE, self.settings, self.queue, self.window)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) packet, log_messages, log_as_ph = self.queue.get() self.assertIsInstance(packet, bytes) self.assertTrue(log_messages) self.assertFalse(log_as_ph) def test_queue_message_normal(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), MESSAGE) # Test self.assertIsNone( queue_packets(packet_list, MESSAGE, self.settings, self.queue, self.window)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) packet, settings, rx_account, tx_account, log_setting, log_as_ph, win_uid = self.queue.get( ) self.assertIsInstance(packet, bytes) self.assertIsInstance(settings, Settings) self.assertEqual(rx_account, '*****@*****.**') self.assertEqual(tx_account, '*****@*****.**') self.assertEqual(win_uid, '*****@*****.**') self.assertTrue(log_setting) self.assertFalse(log_as_ph) def test_queue_file_traffic_masking(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), FILE) self.settings.session_traffic_masking = True # Test self.assertIsNone( queue_packets(packet_list, FILE, self.settings, self.queue, self.window)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) packet, log_messages, log_as_ph = self.queue.get() self.assertIsInstance(packet, bytes) self.assertTrue(log_messages) self.assertFalse(log_as_ph) def test_queue_file_normal(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), FILE) # Test self.assertIsNone( queue_packets(packet_list, FILE, self.settings, self.queue, self.window, log_as_ph=True)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) packet, settings, rx_account, tx_account, log_setting, log_as_ph, window_uid = self.queue.get( ) self.assertIsInstance(packet, bytes) self.assertIsInstance(settings, Settings) self.assertEqual(rx_account, '*****@*****.**') self.assertEqual(tx_account, '*****@*****.**') self.assertEqual(window_uid, '*****@*****.**') self.assertTrue(log_setting) self.assertTrue(log_as_ph) def test_queue_command_traffic_masking(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), COMMAND) self.settings.session_traffic_masking = True # Test self.assertIsNone( queue_packets(packet_list, COMMAND, self.settings, self.queue, self.window)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) data, log_messages = self.queue.get() self.assertIsInstance(data, bytes) self.assertTrue(log_messages) def test_queue_command_normal(self): # Setup packet_list = split_to_assembly_packets(os.urandom(200), COMMAND) # Test self.assertIsNone( queue_packets(packet_list, COMMAND, self.settings, self.queue, self.window)) time.sleep(0.1) self.assertEqual(self.queue.qsize(), 1) packet, settings = self.queue.get() self.assertIsInstance(packet, bytes) self.assertIsInstance(settings, Settings)
class QueuedWriter: """A queued multiprocess writter.""" def __init__(self, func: Callable): """Fill a queue and call func with data as it is able to use it.""" self._func = func # Queue to fill with data to be written. self._data_queue = Queue() # Queue to get the return value from the write function. self._return_queue = Queue() # Create the writer process. self._writer_p = Process(target=_queue_writer, args=(func, self._data_queue, self._return_queue)) self._writer_p.start() def __repr__(self): """Return a python expression to recreate this instance.""" return f"{self.__class__.__name__}(func={self._func})" def write(self, data: str | bytes): """Send more data down the queue to the processing function.""" self._data_queue.put(data) return len(data) __call__ = write def close(self): """Close the queue and writer process.""" # Send EOF to end the writer process. self._data_queue.put('EOF') # Close the queue. self._data_queue.close() # Wait for the queue buffer to empty. self._data_queue.join_thread() # Get the return value from the writer process. ret_val = self._return_queue.get() # Close the return queue. self._return_queue.close() # Wait for the writer process to exit. self._writer_p.join() # Return the writer result. return ret_val def __enter__(self) -> object: """Provide the ability to use pythons with statement.""" try: return self except Exception as err: print(err) return None def __exit__(self, exc_type, exc_value, traceback) -> bool: """Close the file when finished.""" try: return bool(self.close()) or not bool(exc_type) except Exception as err: print(err) return False
class AnalysisScheduler: ''' This Scheduler performs analysis of firmware objects ''' def __init__(self, config: Optional[ConfigParser] = None, pre_analysis=None, post_analysis=None, db_interface=None): self.config = config self.analysis_plugins = {} self.load_plugins() self.stop_condition = Value('i', 0) self.process_queue = Queue() self.tag_queue = Queue() self.db_backend_service = db_interface if db_interface else BackEndDbInterface(config=config) self.pre_analysis = pre_analysis if pre_analysis else self.db_backend_service.add_object self.post_analysis = post_analysis if post_analysis else self.db_backend_service.add_analysis self.start_scheduling_process() self.start_result_collector() logging.info('Analysis System online...') logging.info('Plugins available: {}'.format(self.get_list_of_available_plugins())) def shutdown(self): ''' shutdown the scheduler and all loaded plugins ''' logging.debug('Shutting down...') self.stop_condition.value = 1 with ThreadPoolExecutor() as e: e.submit(self.schedule_process.join) e.submit(self.result_collector_process.join) for plugin in self.analysis_plugins: e.submit(self.analysis_plugins[plugin].shutdown) if getattr(self.db_backend_service, 'shutdown', False): self.db_backend_service.shutdown() self.tag_queue.close() self.process_queue.close() logging.info('Analysis System offline') def add_update_task(self, fo: FileObject): for included_file in self.db_backend_service.get_list_of_all_included_files(fo): child = self.db_backend_service.get_object(included_file) child.scheduled_analysis = self._add_dependencies_recursively(fo.scheduled_analysis or []) child.scheduled_analysis = self._smart_shuffle(child.scheduled_analysis) self.check_further_process_or_complete(child) self.check_further_process_or_complete(fo) def add_task(self, fo: FileObject): ''' This function should be used to add a new firmware object to the scheduler ''' scheduled_plugins = self._add_dependencies_recursively(fo.scheduled_analysis or []) fo.scheduled_analysis = self._smart_shuffle(scheduled_plugins + MANDATORY_PLUGINS) self.check_further_process_or_complete(fo) def _smart_shuffle(self, plugin_list: List[str]) -> List[str]: scheduled_plugins = [] remaining_plugins = set(plugin_list) while len(remaining_plugins) > 0: next_plugins = self._get_plugins_with_met_dependencies(remaining_plugins, scheduled_plugins) if not next_plugins: logging.error('Error: Could not schedule plugins because dependencies cannot be fulfilled: {}'.format(remaining_plugins)) break scheduled_plugins[:0] = shuffled(next_plugins) remaining_plugins.difference_update(next_plugins) # assure file type is first for blacklist functionality if 'file_type' in scheduled_plugins and scheduled_plugins[-1] != 'file_type': scheduled_plugins.remove('file_type') scheduled_plugins.append('file_type') return scheduled_plugins def _get_plugins_with_met_dependencies(self, remaining_plugins: Set[str], scheduled_plugins: List[str]) -> List[str]: met_dependencies = scheduled_plugins return [ plugin for plugin in remaining_plugins if all(dependency in met_dependencies for dependency in self.analysis_plugins[plugin].DEPENDENCIES) ] def get_list_of_available_plugins(self): ''' returns a list of all loaded plugins ''' plugin_list = list(self.analysis_plugins.keys()) plugin_list.sort(key=str.lower) return plugin_list # ---- internal functions ---- def get_default_plugins_from_config(self): try: result = {} for plugin_set in self.config['default_plugins']: result[plugin_set] = read_list_from_config(self.config, 'default_plugins', plugin_set) return result except (TypeError, KeyError, AttributeError): logging.warning('default plug-ins not set in config') return [] def get_plugin_dict(self): ''' returns a dictionary of plugins with the following form: names as keys and the respective description value {NAME: (DESCRIPTION, MANDATORY_FLAG, DEFAULT_FLAG, VERSION)} - mandatory plug-ins shall not be shown in the analysis selection but always exectued - default plug-ins shall be pre-selected in the analysis selection ''' plugin_list = self.get_list_of_available_plugins() plugin_list = self._remove_unwanted_plugins(plugin_list) default_plugins = self.get_default_plugins_from_config() default_flag_dict = {} result = {} for plugin in plugin_list: mandatory_flag = plugin in MANDATORY_PLUGINS for key in default_plugins.keys(): default_flag_dict[key] = plugin in default_plugins[key] result[plugin] = (self.analysis_plugins[plugin].DESCRIPTION, mandatory_flag, dict(default_flag_dict), self.analysis_plugins[plugin].VERSION) result['unpacker'] = ('Additional information provided by the unpacker', True, False) return result # ---- scheduling functions ---- def get_scheduled_workload(self): workload = {'analysis_main_scheduler': self.process_queue.qsize()} for plugin in self.analysis_plugins: workload[plugin] = self.analysis_plugins[plugin].in_queue.qsize() return workload def register_plugin(self, name, plugin_instance): ''' This function is called upon plugin init to announce its presence ''' self.analysis_plugins[name] = plugin_instance def load_plugins(self): source = import_plugins('analysis.plugins', 'plugins/analysis') for plugin_name in source.list_plugins(): plugin = source.load_plugin(plugin_name) plugin.AnalysisPlugin(self, config=self.config) def start_scheduling_process(self): logging.debug('Starting scheduler...') self.schedule_process = ExceptionSafeProcess(target=self.scheduler) self.schedule_process.start() def scheduler(self): while self.stop_condition.value == 0: try: task = self.process_queue.get(timeout=int(self.config['ExpertSettings']['block_delay'])) except Empty: pass else: self.process_next_analysis(task) # ---- analysis skipping ---- def process_next_analysis(self, fw_object: FileObject): self.pre_analysis(fw_object) analysis_to_do = fw_object.scheduled_analysis.pop() if analysis_to_do not in self.analysis_plugins: logging.error('Plugin \'{}\' not available'.format(analysis_to_do)) self.check_further_process_or_complete(fw_object) else: self._start_or_skip_analysis(analysis_to_do, fw_object) def _start_or_skip_analysis(self, analysis_to_do: str, fw_object: FileObject): if self._analysis_is_already_in_db_and_up_to_date(analysis_to_do, fw_object.get_uid()): logging.debug('skipping analysis "{}" for {} (analysis already in DB)'.format(analysis_to_do, fw_object.get_uid())) if analysis_to_do in self._get_cumulative_remaining_dependencies(fw_object.scheduled_analysis): self._add_completed_analysis_results_to_file_object(analysis_to_do, fw_object) self.check_further_process_or_complete(fw_object) elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(analysis_to_do, fw_object): logging.debug('skipping analysis "{}" for {} (blacklisted file type)'.format(analysis_to_do, fw_object.get_uid())) fw_object.processed_analysis[analysis_to_do] = self._get_skipped_analysis_result(analysis_to_do) self.check_further_process_or_complete(fw_object) else: self.analysis_plugins[analysis_to_do].add_job(fw_object) def _add_completed_analysis_results_to_file_object(self, analysis_to_do: str, fw_object: FileObject): db_entry = self.db_backend_service.get_specific_fields_of_db_entry( fw_object.get_uid(), {'processed_analysis.{}'.format(analysis_to_do): 1} ) desanitized_analysis = self.db_backend_service.retrieve_analysis(db_entry['processed_analysis']) fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[analysis_to_do] def _analysis_is_already_in_db_and_up_to_date(self, analysis_to_do: str, uid: str): db_entry = self.db_backend_service.get_specific_fields_of_db_entry( uid, { 'processed_analysis.{}.file_system_flag'.format(analysis_to_do): 1, 'processed_analysis.{}.plugin_version'.format(analysis_to_do): 1, 'processed_analysis.{}.system_version'.format(analysis_to_do): 1 } ) if not db_entry or analysis_to_do not in db_entry['processed_analysis']: return False elif 'plugin_version' not in db_entry['processed_analysis'][analysis_to_do]: logging.error('Plugin Version missing: UID: {}, Plugin: {}'.format(uid, analysis_to_do)) return False if db_entry['processed_analysis'][analysis_to_do]['file_system_flag']: db_entry['processed_analysis'] = self.db_backend_service.retrieve_analysis(db_entry['processed_analysis'], analysis_filter=[analysis_to_do, ]) if 'file_system_flag' in db_entry['processed_analysis'][analysis_to_do]: logging.warning('Desanitization of version string failed') return False analysis_plugin_version = db_entry['processed_analysis'][analysis_to_do]['plugin_version'] analysis_system_version = db_entry['processed_analysis'][analysis_to_do]['system_version'] \ if 'system_version' in db_entry['processed_analysis'][analysis_to_do] else None plugin_version = self.analysis_plugins[analysis_to_do].VERSION system_version = self.analysis_plugins[analysis_to_do].SYSTEM_VERSION \ if hasattr(self.analysis_plugins[analysis_to_do], 'SYSTEM_VERSION') else None if LooseVersion(analysis_plugin_version) < LooseVersion(plugin_version) or \ LooseVersion(analysis_system_version or '0') < LooseVersion(system_version or '0'): return False return True # ---- blacklist and whitelist ---- def _get_skipped_analysis_result(self, analysis_to_do): return { 'skipped': 'blacklisted file type', 'summary': [], 'analysis_date': time(), 'plugin_version': self.analysis_plugins[analysis_to_do].VERSION } def _next_analysis_is_blacklisted(self, next_analysis: str, fw_object: FileObject): blacklist, whitelist = self._get_blacklist_and_whitelist(next_analysis) if not (blacklist or whitelist): return False if blacklist and whitelist: logging.error('{}Configuration of plugin "{}" erroneous{}: found blacklist and whitelist. Ignoring blacklist.'.format( bcolors.FAIL, next_analysis, bcolors.ENDC)) file_type = self._get_file_type_from_object_or_db(fw_object) if whitelist: return not substring_is_in_list(file_type, whitelist) return substring_is_in_list(file_type, blacklist) def _get_file_type_from_object_or_db(self, fw_object: FileObject) -> Optional[str]: if 'file_type' not in fw_object.processed_analysis: self._add_completed_analysis_results_to_file_object('file_type', fw_object) return fw_object.processed_analysis['file_type']['mime'].lower() def _get_blacklist_and_whitelist(self, next_analysis: str) -> Tuple[List, List]: blacklist, whitelist = self._get_blacklist_and_whitelist_from_config(next_analysis) if not (blacklist or whitelist): blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(next_analysis) return blacklist, whitelist def _get_blacklist_and_whitelist_from_config(self, analysis_plugin: str) -> Tuple[List, List]: blacklist = read_list_from_config(self.config, analysis_plugin, 'mime_blacklist') whitelist = read_list_from_config(self.config, analysis_plugin, 'mime_whitelist') return blacklist, whitelist # ---- result collector functions ---- def _get_blacklist_and_whitelist_from_plugin(self, analysis_plugin: str) -> Tuple[List, List]: blacklist = self.analysis_plugins[analysis_plugin].MIME_BLACKLIST if hasattr(self.analysis_plugins[analysis_plugin], 'MIME_BLACKLIST') else [] whitelist = self.analysis_plugins[analysis_plugin].MIME_WHITELIST if hasattr(self.analysis_plugins[analysis_plugin], 'MIME_WHITELIST') else [] return blacklist, whitelist def start_result_collector(self): logging.debug('Starting result collector') self.result_collector_process = ExceptionSafeProcess(target=self.result_collector) self.result_collector_process.start() # ---- miscellaneous functions ---- def result_collector(self): while self.stop_condition.value == 0: nop = True for plugin in self.analysis_plugins: try: fw = self.analysis_plugins[plugin].out_queue.get_nowait() fw = self._handle_analysis_tags(fw, plugin) except Empty: pass else: nop = False if plugin in fw.processed_analysis: self.post_analysis(fw) self.check_further_process_or_complete(fw) if nop: sleep(int(self.config['ExpertSettings']['block_delay'])) def _handle_analysis_tags(self, fw, plugin): self.tag_queue.put(check_tags(fw, plugin)) return add_tags_to_object(fw, plugin) def check_further_process_or_complete(self, fw_object): if not fw_object.scheduled_analysis: logging.info('Analysis Completed:\n{}'.format(fw_object)) else: self.process_queue.put(fw_object) @staticmethod def _remove_unwanted_plugins(list_of_plugins): defaults = ['dummy_plugin_for_testing_only'] for plugin in defaults: list_of_plugins.remove(plugin) return list_of_plugins def check_exceptions(self): for _, plugin in self.analysis_plugins.items(): if plugin.check_exceptions(): return True for process in [self.schedule_process, self.result_collector_process]: if process.exception: logging.error('{}Exception in scheduler process {}{}'.format(bcolors.FAIL, bcolors.ENDC, process.name)) logging.error(process.exception[1]) terminate_process_and_childs(process) return True # Error here means nothing will ever get scheduled again. Thing should just break ! return False def _add_dependencies_recursively(self, scheduled_analyses: List[str]) -> List[str]: scheduled_analyses_set = set(scheduled_analyses) while True: new_dependencies = self._get_cumulative_remaining_dependencies(scheduled_analyses_set) if not new_dependencies: break scheduled_analyses_set.update(new_dependencies) return list(scheduled_analyses_set) def _get_cumulative_remaining_dependencies(self, scheduled_analyses: Set[str]) -> Set[str]: return { dependency for plugin in scheduled_analyses for dependency in self.analysis_plugins[plugin].DEPENDENCIES }.difference(scheduled_analyses)
class TestRunnerManager(threading.Thread): init_lock = threading.Lock() def __init__(self, suite_name, tests, test_source_cls, browser_cls, browser_kwargs, executor_cls, executor_kwargs, stop_flag, pause_after_test=False, pause_on_unexpected=False, restart_on_unexpected=True, debug_info=None): """Thread that owns a single TestRunner process and any processes required by the TestRunner (e.g. the Firefox binary). TestRunnerManagers are responsible for launching the browser process and the runner process, and for logging the test progress. The actual test running is done by the TestRunner. In particular they: * Start the binary of the program under test * Start the TestRunner * Tell the TestRunner to start a test, if any * Log that the test started * Log the test results * Take any remedial action required e.g. restart crashed or hung processes """ self.suite_name = suite_name self.tests = tests self.test_source_cls = test_source_cls self.test_queue = None self.browser_cls = browser_cls self.browser_kwargs = browser_kwargs self.executor_cls = executor_cls self.executor_kwargs = executor_kwargs self.test_source = None # Flags used to shut down this thread if we get a sigint self.parent_stop_flag = stop_flag self.child_stop_flag = multiprocessing.Event() self.pause_after_test = pause_after_test self.pause_on_unexpected = pause_on_unexpected self.restart_on_unexpected = restart_on_unexpected self.debug_info = debug_info self.manager_number = next_manager_number() self.command_queue = Queue() self.remote_queue = Queue() self.test_runner_proc = None threading.Thread.__init__(self, name="Thread-TestrunnerManager-%i" % self.manager_number) # This is started in the actual new thread self.logger = None self.unexpected_count = 0 # This may not really be what we want self.daemon = True self.max_restarts = 5 self.browser = None def run(self): """Main loop for the TestManager. TestManagers generally receive commands from their TestRunner updating them on the status of a test. They may also have a stop flag set by the main thread indicating that the manager should shut down the next time the event loop spins.""" self.logger = structuredlog.StructuredLogger(self.suite_name) with self.browser_cls( self.logger, **self.browser_kwargs) as browser, self.test_source_cls( self.tests) as test_source: self.browser = BrowserManager(self.logger, browser, self.command_queue, no_timeout=self.debug_info is not None) self.test_source = test_source dispatch = { RunnerManagerState.before_init: self.start_init, RunnerManagerState.initalizing: self.init, RunnerManagerState.running: self.run_test, RunnerManagerState.restarting: self.restart_runner } self.state = RunnerManagerState.before_init() end_states = (RunnerManagerState.stop, RunnerManagerState.error) try: while not isinstance(self.state, end_states): f = dispatch.get(self.state.__class__) while f: self.logger.debug("Dispatch %s" % f.__name__) if self.should_stop(): return new_state = f() if new_state is None: break self.state = new_state self.logger.debug("new state: %s" % self.state.__class__.__name__) if isinstance(self.state, end_states): return f = dispatch.get(self.state.__class__) new_state = None while new_state is None: new_state = self.wait_event() if self.should_stop(): return self.state = new_state self.logger.debug("new state: %s" % self.state.__class__.__name__) except Exception as e: self.logger.error(traceback.format_exc(e)) raise finally: self.logger.debug( "TestRunnerManager main loop terminating, starting cleanup" ) clean = isinstance(self.state, RunnerManagerState.stop) self.stop_runner(force=not clean) self.teardown() self.logger.debug("TestRunnerManager main loop terminated") def wait_event(self): dispatch = { RunnerManagerState.before_init: {}, RunnerManagerState.initalizing: { "init_succeeded": self.init_succeeded, "init_failed": self.init_failed, }, RunnerManagerState.running: { "test_ended": self.test_ended, "wait_finished": self.wait_finished, }, RunnerManagerState.restarting: {}, RunnerManagerState.error: {}, RunnerManagerState.stop: {}, None: { "runner_teardown": self.runner_teardown, "log": self.log, "error": self.error } } try: command, data = self.command_queue.get(True, 1) except IOError: self.logger.error("Got IOError from poll") return RunnerManagerState.restarting(0) except Empty: if (self.debug_info and self.debug_info.interactive and self.browser.started and not self.browser.is_alive()): self.logger.debug("Debugger exited") return RunnerManagerState.stop() if (isinstance(self.state, RunnerManagerState.running) and not self.test_runner_proc.is_alive()): if not self.command_queue.empty(): # We got a new message so process that return # If we got to here the runner presumably shut down # unexpectedly self.logger.info("Test runner process shut down") if self.state.test is not None: # This could happen if the test runner crashed for some other # reason # Need to consider the unlikely case where one test causes the # runner process to repeatedly die self.logger.critical("Last test did not complete") return RunnerManagerState.error() self.logger.warning( "More tests found, but runner process died, restarting") return RunnerManagerState.restarting(0) else: f = (dispatch.get(self.state.__class__, {}).get(command) or dispatch.get(None, {}).get(command)) if not f: self.logger.warning("Got command %s in state %s" % (command, self.state.__class__.__name__)) return return f(*data) def should_stop(self): return self.child_stop_flag.is_set() or self.parent_stop_flag.is_set() def start_init(self): test, test_queue = self.get_next_test() if test is None: return RunnerManagerState.stop() else: return RunnerManagerState.initalizing(test, test_queue, 0) def init(self): assert isinstance(self.state, RunnerManagerState.initalizing) if self.state.failure_count > self.max_restarts: self.logger.error("Max restarts exceeded") return RunnerManagerState.error() result = self.browser.init() if result is Stop: return RunnerManagerState.error() elif not result: return RunnerManagerState.initalizing(self.state.test, self.state.test_queue, self.state.failure_count + 1) else: self.start_test_runner() def start_test_runner(self): # Note that we need to be careful to start the browser before the # test runner to ensure that any state set when the browser is started # can be passed in to the test runner. assert isinstance(self.state, RunnerManagerState.initalizing) assert self.command_queue is not None assert self.remote_queue is not None self.logger.info("Starting runner") executor_browser_cls, executor_browser_kwargs = self.browser.browser.executor_browser( ) args = (self.remote_queue, self.command_queue, self.executor_cls, self.executor_kwargs, executor_browser_cls, executor_browser_kwargs, self.child_stop_flag) self.test_runner_proc = Process(target=start_runner, args=args, name="Thread-TestRunner-%i" % self.manager_number) self.test_runner_proc.start() self.logger.debug("Test runner started") # Now we wait for either an init_succeeded event or an init_failed event def init_succeeded(self): assert isinstance(self.state, RunnerManagerState.initalizing) self.browser.after_init() return RunnerManagerState.running(self.state.test, self.state.test_queue) def init_failed(self): assert isinstance(self.state, RunnerManagerState.initalizing) self.browser.after_init() self.stop_runner(force=True) return RunnerManagerState.initalizing(self.state.test, self.state.test_queue, self.state.failure_count + 1) def get_next_test(self, test_queue=None): test = None while test is None: if test_queue is None: test_queue = self.test_source.get_queue() if test_queue is None: self.logger.info("No more tests") return None, None try: # Need to block here just to allow for contention with other processes test = test_queue.get(block=True, timeout=1) except Empty: pass return test, test_queue def run_test(self): assert isinstance(self.state, RunnerManagerState.running) assert self.state.test is not None self.logger.test_start(self.state.test.id) self.send_message("run_test", self.state.test) def test_ended(self, test, results): """Handle the end of a test. Output the result of each subtest, and the result of the overall harness to the logs. """ assert isinstance(self.state, RunnerManagerState.running) assert test == self.state.test # Write the result of each subtest file_result, test_results = results subtest_unexpected = False for result in test_results: if test.disabled(result.name): continue expected = test.expected(result.name) is_unexpected = expected != result.status if is_unexpected: self.unexpected_count += 1 self.logger.debug("Unexpected count in this thread %i" % self.unexpected_count) subtest_unexpected = True self.logger.test_status(test.id, result.name, result.status, message=result.message, expected=expected, stack=result.stack) # TODO: consider changing result if there is a crash dump file # Write the result of the test harness expected = test.expected() status = file_result.status if file_result.status != "EXTERNAL-TIMEOUT" else "TIMEOUT" is_unexpected = expected != status if is_unexpected: self.unexpected_count += 1 self.logger.debug("Unexpected count in this thread %i" % self.unexpected_count) if status == "CRASH": self.browser.log_crash(test.id) self.logger.test_end(test.id, status, message=file_result.message, expected=expected, extra=file_result.extra) restart_before_next = (test.restart_after or file_result.status in ("CRASH", "EXTERNAL-TIMEOUT") or ((subtest_unexpected or is_unexpected) and self.restart_on_unexpected)) if (self.pause_after_test or (self.pause_on_unexpected and (subtest_unexpected or is_unexpected))): self.logger.info("Pausing until the browser exits") self.send_message("wait") else: return self.after_test_end(restart_before_next) def wait_finished(self): assert isinstance(self.state, RunnerManagerState.running) # The browser should be stopped already, but this ensures we do any post-stop # processing self.logger.debug("Wait finished") return self.after_test_end(True) def after_test_end(self, restart): assert isinstance(self.state, RunnerManagerState.running) test, test_queue = self.get_next_test() if test is None: return RunnerManagerState.stop() if test_queue != self.state.test_queue: # We are starting a new group of tests, so force a restart restart = True if restart: return RunnerManagerState.restarting(test, test_queue) else: return RunnerManagerState.running(test, test_queue) def restart_runner(self): """Stop and restart the TestRunner""" assert isinstance(self.state, RunnerManagerState.restarting) self.stop_runner() return RunnerManagerState.initalizing(self.state.test, self.state.test_queue, 0) def log(self, action, kwargs): getattr(self.logger, action)(**kwargs) def error(self, message): self.logger.error(message) self.restart_runner() def stop_runner(self, force=False): """Stop the TestRunner and the browser binary.""" if self.test_runner_proc is None: return if self.test_runner_proc.is_alive(): self.send_message("stop") try: self.browser.stop(force=force) self.ensure_runner_stopped() finally: self.cleanup() def teardown(self): self.logger.debug("teardown in testrunnermanager") self.test_runner_proc = None self.command_queue.close() self.remote_queue.close() self.command_queue = None self.remote_queue = None def ensure_runner_stopped(self): self.logger.debug("ensure_runner_stopped") if self.test_runner_proc is None: return self.logger.debug("waiting for runner process to end") self.test_runner_proc.join(10) self.logger.debug("After join") if self.test_runner_proc.is_alive(): # This might leak a file handle from the queue self.logger.warning("Forcibly terminating runner process") self.test_runner_proc.terminate() self.test_runner_proc.join(10) else: self.logger.debug("Testrunner exited with code %i" % self.test_runner_proc.exitcode) def runner_teardown(self): self.ensure_runner_stopped() return RunnerManagerState.stop() def send_message(self, command, *args): self.remote_queue.put((command, args)) def cleanup(self): self.logger.debug("TestManager cleanup") if self.browser: self.browser.cleanup() while True: try: self.logger.warning(" ".join( map(repr, self.command_queue.get_nowait()))) except Empty: break
collection_queue.put((app, collection_name)) status_map[app]['iteration_finished'] = str( datetime.datetime.now()) logger.info('Finished publishing collections for app [%s] !' % app) # allow collection workers to finish wait_for(collection_workers, label='collection_workers', sleep_time=30) status_listener.terminate() except KeyboardInterrupt: logger.warning('Keyboard Interrupt, aborting...') collection_queue.close() collection_response_queue.close() [ os.kill(super(EntityExportWorker, p).pid, signal.SIGINT) for p in collection_workers ] os.kill(super(StatusListener, status_listener).pid, signal.SIGINT) [w.terminate() for w in collection_workers] status_listener.terminate() logger.info('entity_workers DONE!') if __name__ == "__main__":