def __call__(self, cv_iterator, evaluator, fold_callback=None, n_jobs=None): """ """ condvar = Condition() results = [] def _signal_cb(result): condvar.acquire() results.append(result) condvar.notify() condvar.release() folds = list(cv_iterator) pool, deferreds = self.async(folds, evaluator, fold_callback=_signal_cb, n_jobs=n_jobs) pool.close() while len(results) < len(folds): condvar.acquire() condvar.wait() fold_estimator, result = results[-1] fold_callback(fold_estimator, result) condvar.release() pool.join() return results
class ProgressTracker(Thread): def __init__(self): super().__init__() self.lock = Condition() self.done = Value("H", 0) self.file = Value(c_wchar_p, "") self.progress = Value("d", 0.0) self.callbacks = [] def update(self, file, progress): with self.lock: self.file = file self.progress = progress self.lock.notifyAll() def complete(self): with self.done.get_lock(): self.done.value = 1 with self.lock: self.lock.notifyAll() def registerUpdateCallback(self, callback): self.callbacks.append(callback) def run(self): while True: with self.done.get_lock(): if self.done.value: break self.lock.wait() with self.file.get_lock() and self.progress.get_lock(): for callback in self.callbacks: callback(file.value, progress.value)
def getDataSet(time0, users, rgp): start = time.time() plt_queue = Queue() plt_cond = Condition() pool = [] max_process = 2 i = 0 n_user = len(users.index) while i < n_user: if len(pool) >= max_process: plt_cond.acquire() if plt_queue.empty(): plt_cond.wait() while not plt_queue.empty(): pos = -1 uq = plt_queue.get() for pos in range(len(pool)): if pool[pos].u == uq: break pool[pos].join() del pool[pos] plt_cond.release() u = users.index[i] m = users.loc[u, "memberSince"] p = MultipRecord(rgp, u, m, time0, plt_queue, plt_cond) pool.append(p) p.start() i += 1 print "subProcess start....." [p.join() for p in pool] end = time.time() print "time for make Reg trainSet: %.3f s" % (end - start)
class ThreeFM(Process): def __init__(self, d, name, year): super().__init__() self.d = d self.name = name self.year = year self.r = [] self._mutex = RLock() self._empty = Condition(self._mutex) self._full = Condition(self._mutex) def run(self): with ThreadPoolExecutor(max_workers=2) as pool: q = { pool.submit(self.put, self.name, dat(self.year, m, 1)): m for m in range(1, 13) } def __str__(self): return str(self.d.values()[0]) def put(self, name, date): with self._full: while len(self.r) >= 12: self._full.wait() self.r.append(stat(date, name)) self.d[0] = self.r self._empty.notify() def get(self): return self.d.values()[0]
class ServerProcess(Process): def __init__(self): super().__init__() self.url = f"opc.tcp://127.0.0.1:{port_num}" self.cond = Condition() self.stop_ev = Event() async def run_server(self, url): srv = Server() srv.set_endpoint(url) await srv.init() await add_server_methods(srv) await add_server_custom_enum_struct(srv) async with srv: with self.cond: self.cond.notify_all() while not self.stop_ev.is_set(): await asyncio.sleep(1) await srv.stop() def stop(self): self.stop_ev.set() async def wait_for_start(self): with ThreadPoolExecutor() as pool: result = await asyncio.get_running_loop().run_in_executor( pool, self.wait_for_start_sync) def wait_for_start_sync(self): with self.cond: self.cond.wait() def run(self): loop = asyncio.new_event_loop() loop.run_until_complete(self.run_server(self.url))
class Producer(Process): def __init__(self, prod_end, fname, SHARED_QUEUE_SIZE_LIMIT): super(Producer, self).__init__() self.prod_end = prod_end self.fp = open(fname, 'r') self.SHARED_QUEUE_SIZE_LIMIT = SHARED_QUEUE_SIZE_LIMIT self.batch_queue = [] self.condition = Condition() self.pipe_out_thread = PipeOutThread(prod_end, self.condition, self.SHARED_QUEUE_SIZE_LIMIT, self.batch_queue) def _preprocess(self, data): N = 1000 * 1000 * 10 * 3 while N > 0: N -= 1 return data def _is_shared_queue_full(self): if self.pipe_out_thread.get_queue_size( ) >= self.SHARED_QUEUE_SIZE_LIMIT: return True else: return False def _preprocess_and_put_in_queue(self, data): self.condition.acquire() #print 'prod acquired' if self._is_shared_queue_full(): #print 'prod: queue is full so waiting' self.condition.wait() self.batch_queue.append(self._preprocess(data)) #print 'self.batch_queue', len(self.batch_queue) self.condition.notify() self.condition.release() def _read_data(self, i, dummy=False): if dummy: return 'soumya' else: data = None offset = random.randint(5, 16) * GB #print 'offset is : ' , offset/GB self.fp.seek(offset) data = self.fp.read(CHUNK_SIZE_TO_READ) #print 'len_data ', len(data) return data def run(self): self.pipe_out_thread.start() for i in range(BATCHES): data = self._read_data(i) self._preprocess_and_put_in_queue(data) #print 'prod put %s'%(i) self.pipe_out_thread.join()
def test_watch_directory(): def _cleanup(path): for f in listdir(path): p = join(path, f) if isdir(p): rmtree(p) elif f != '.nothing': unlink(p) sample_template = '' sample_directory = dirname(realpath(__file__)) + '/sample/' watch_directory = sample_directory + 'watch/' render_directory = sample_directory + 'render/' template_directory = sample_directory + 'templates/' with open(template_directory + 'haml.tmpl', 'r') as f: sample_template = f.read() condition = Condition() p = Process(target=reloader.watch_directory, args=(watch_directory, render_directory, condition)) condition.acquire() p.start() condition.wait() try: with open(watch_directory + 'test.haml', 'w') as f: f.write(sample_template) subdir = watch_directory + 'test_subdir/' try: mkdir(subdir) except OSError: if not isdir(subdir): raise with open(subdir + 'test_two.haml', 'w') as f: f.write(sample_template) sleep(1) assert_true(exists(render_directory + 'test.html')) assert_true(exists(render_directory + 'test_subdir/test_two.html')) except: raise finally: condition.release() p.terminate() p.join() sleep(1) _cleanup(watch_directory) _cleanup(render_directory)
def genSimTask(path, days=0, thd=0.8): start = time.time() pool = [] selTasks = pd.read_pickle(path + "challenges.data") selTasks = selTasks.iloc[:100, :] taskIndex = selTasks.index maxProcess = 64 sel_queue = Queue() sel_cond = Condition() for i in range(len(taskIndex)): if len(pool) >= maxProcess: sel_cond.acquire() if sel_queue.empty(): sel_cond.wait() while not sel_queue.empty(): pos = -1 name0 = sel_queue.get() for pos in range(len(pool)): if pool[pos].task0.name == name0: break pool[pos].join() del pool[pos] sel_cond.release() task0 = selTasks.iloc[i] cand_tasks = selTasks[selTasks["RegStart"] <= task0["SubEnd"]] if days > 0: cand_tasks = cand_tasks[task0["RegStart"] - cand_tasks["RegStart"] <= days] filepath0 = path + "simTasks%d/sim_tasks_%d.data" % (days, taskIndex[i]) proc = SimTasks(filepath0, task0, cand_tasks, thd, sel_queue, sel_cond) proc.start() pool.append(proc) [pro.join() for pro in pool] end = time.time() print "time for generate sim_task %.3f s." % (end - start) simTaskContainer = {} for taskI in taskIndex: filepath0 = path + "simTasks%d/sim_tasks_%d.data" % (days, taskIndex[i]) with open(filepath0, "rb") as f: sim_tasks = pickle.load(f) simTaskContainer[taskI] = sim_tasks sum_path = path + "simTasks/sim_tasks_%d.data" % days with open(sum_path, "wb") as f: pickle.dump(simTaskContainer, f)
class Barrier(object): def __init__(self, num_threads): self.num_threads = num_threads self.threads_left = Value('i', num_threads, lock=True) self.mutex = Lock() self.waitcond = Condition(self.mutex) def wait(self): self.mutex.acquire() self.threads_left.value -= 1 if self.threads_left.value == 0: self.threads_left.value = self.num_threads self.waitcond.notify_all() self.mutex.release() else: self.waitcond.wait() self.mutex.release()
def looking_for_positions( self, position_id: int, cond_looking_for_positions: multiprocessing.Condition, mutex_positions: multiprocessing.Lock, mutex_end_update: multiprocessing.Lock, event_compute_results: multiprocessing.Event, positions, update_ok ): """ :param position_id: :param cond_looking_for_positions: :param mutex_positions: :param mutex_end_update: :param event_compute_results: :param positions: :param update_ok: """ while self.__is_alive: # print(f"[{os.getpid()}] looking_for_positions: {position_id}") with cond_looking_for_positions: cond_looking_for_positions.wait() _positions = positions[position_id] for pos in _positions: x, y = pos min_y = max(y - 1, 0) max_y = min(y + 2, self.max_y) min_x = max(x - 1, 0) max_x = min(x + 2, self.max_x) new_pos = np.array( [[i, j] for i in range(min_x, max_x) for j in range(min_y, max_y) if i != x or j != y]) for new in new_pos: if not any(np.equal(_positions, new).all(axis=-1)): _positions = np.concatenate((_positions, np.expand_dims(new, axis=0))) with mutex_positions: positions[position_id] = _positions self.end_update(mutex_end_update, event_compute_results, update_ok)
def run(frame: Array, target_in_sight: Condition): archive_path = Path("/mnt/nas/data/birdthings") archive_path.mkdir(exist_ok=True, parents=True) while True: with target_in_sight: target_in_sight.wait() with frame.get_lock(): now = datetime.now() if not (7 < now.hour < 18): continue datepart, timepart = str(now).split(" ") dest = ( archive_path / datepart / timepart.split(":")[0] / (timepart + ".jpeg") ) dest.parent.mkdir(exist_ok=True, parents=True) Image.frombytes("RGB", camera.RESOLUTION, frame.get_obj()).save( dest, format="jpeg" ) logging.info(f'archived image {dest}')
class Monitor: def __init__(self, bufferSize): # Shared Data self.buffer = Array('i', bufferSize) self.bufferSize = bufferSize self.freePositions = Value('i', bufferSize) # Local Data self.nextRead = 0 self.nextWrite = 0 # Control Data self.mutex = Lock() self.items = Condition(self.mutex) self.positions = Condition(self.mutex) def produce(self, value): self.mutex.acquire() if (self.freePositions.value == 0): self.positions.wait() self.buffer[self.nextWrite] = value self.nextWrite = (self.nextWrite + 1) % self.bufferSize self.freePositions.value = self.freePositions.value - 1 self.items.notify() self.mutex.release() def consume(self): newItem = None self.mutex.acquire() if (self.freePositions.value == self.bufferSize): self.items.wait() newItem = self.buffer[self.nextRead] self.nextRead = (self.nextRead + 1) % self.bufferSize self.freePositions.value = self.freePositions.value + 1 self.positions.notify() self.mutex.release() return newItem
class OrderedQueue(object): def __init__(self, maxsize): self.queue = Queue(maxsize=maxsize) self.lock = Lock() self.getlock = Lock() self.putcounter = Value('i', -1) self.getcounter = Value('i', 0) self.cond = Condition(self.lock) self.manager = Manager() self.getlist = self.manager.list() def put(self, index, elem): with self.lock: while index != self.putcounter.value + 1: self.cond.wait() self.queue.put((index, elem)) #sys.stderr.write("right after adding data with SEED %i. Queue size is now %i\n" %(index, self.queue.qsize())) self.putcounter.value += 1 self.cond.notify_all() def get(self): with self.getlock: for i, element in enumerate(self.getlist): index, elem = element if index == self.getcounter.value: self.getcounter.value += 1 del self.getlist[i] return (index, elem) while True: index, elem = self.queue.get() if index == self.getcounter.value: self.getcounter.value += 1 return (index, elem) else: self.getlist.append((index, elem)) def close(self): return self.queue.close() def qsize(self): return self.queue.qsize()
class RWLock: def __init__(self): self.cond = Condition() self.readers = 0 def read_acquire(self): self.cond.acquire() self.readers += 1 self.cond.release() def read_release(self): with self.cond: self.readers -= 1 if (self.readers == 0): self.cond.notify_all() def write_acquire(self): self.cond.acquire() if (self.readers > 0): self.cond.wait() def write_release(self): self.cond.release()
class JoinableQueue(Queue): def __init__(self, maxsize=0): Queue.__init__(self, maxsize) self._unfinished_tasks = Semaphore(0) self._cond = Condition() def __getstate__(self): return Queue.__getstate__(self) + (self._cond, self._unfinished_tasks) def __setstate__(self, state): Queue.__setstate__(self, state[:-2]) self._cond, self._unfinished_tasks = state[-2:] def put(self, obj, block=True, timeout=None): assert not self._closed if not self._sem.acquire(block, timeout): raise Full with self._notempty, self._cond: if self._thread is None: self._start_thread() self._buffer.append(obj) self._unfinished_tasks.release() self._notempty.notify() def task_done(self): with self._cond: if not self._unfinished_tasks.acquire(False): raise ValueError('task_done() called too many times') if self._unfinished_tasks._semlock._is_zero(): self._cond.notify_all() def join(self): with self._cond: if not self._unfinished_tasks._semlock._is_zero(): self._cond.wait()
class BARRIER: def __init__(self, n): self.n = n self.i = Value('i', 0) self.lock = Lock() self.condition = Condition() def acquire(self): with self.lock: self.i.value += 1 if self.n == self.i.value: self.i.value = 0 return True #print self.i.value,self.n self.condition.acquire() self.condition.wait() self.condition.release() return False def release(self): self.condition.acquire() self.condition.notify_all() self.condition.release()
class DataLoader: """ Class for loading data Attributes: num_processor: an integer indicating the number of processors for loading the data, normally 4 is enough capacity: an integer indicating the capacity of the data load queue, default set to 10 batch_size: an integer indicating the batch size for each extraction from the data load queue phase: an string indicating the phase of the data loading process, can only be 'train' or 'test' """ def __init__(self, num_processor, batch_size, phase, batch_idx_init=0, data_ids_init=train_ids, capacity=10): self.num_processor = num_processor self.batch_size = batch_size self.data_load_capacity = capacity self.manager = Manager() self.batch_lock = Lock() self.mutex = Lock() self.cv_full = Condition(self.mutex) self.cv_empty = Condition(self.mutex) self.data_load_queue = self.manager.list() self.cur_batch = self.manager.list([batch_idx_init]) self.processors = [] if phase == 'train': self.data_ids = self.manager.list(data_ids_init) elif phase == 'test': self.data_ids = self.manager.list(test_ids) else: raise ValueError('Could not set phase to %s' % phase) def __load__(self): while True: image_dicts = [] self.batch_lock.acquire() image_ids = self.data_ids[self.cur_batch[0] * self.batch_size:(self.cur_batch[0] + 1) * self.batch_size] self.cur_batch[0] += 1 if (self.cur_batch[0] + 1) * self.batch_size >= len(self.data_ids): self.cur_batch[0] = 0 random.shuffle(self.data_ids) self.batch_lock.release() self.cv_full.acquire() if len(self.data_load_queue) > self.data_load_capacity: self.cv_full.wait() self.data_load_queue.append(get_data(image_ids)) self.cv_empty.notify() self.cv_full.release() def start(self): for _ in range(self.num_processor): p = Process(target=self.__load__) p.start() self.processors.append(p) def get_batch(self): self.cv_empty.acquire() if len(self.data_load_queue) == 0: self.cv_empty.wait() batch_data = self.data_load_queue.pop() self.cv_full.notify() self.cv_empty.release() return batch_data def get_status(self): self.batch_lock.acquire() current_cur_batch = self.cur_batch[0] current_data_ids = self.data_ids self.batch_lock.release() return { 'batch_idx': int(current_cur_batch), 'data_ids': list(current_data_ids) } def stop(self): for p in self.processors: p.terminate()
class TProcessPoolServer(TServer): def __init__(self, *args): TServer.__init__(self, *args) self.numWorkers = 10 self.workers = [] self.isRunning = Value('b', False) self.stopCondition = Condition() self.postForkCallback = None def setPostForkCallback(self, callback): if not callable(callback): raise TypeError("This is not a callback!") self.postForkCallback = callback def setNumWorkers(self, num): """Set the number of worker threads that should be created""" self.numWorkers = num def workerProcess(self): """Loop getting clients from the shared queue and process them""" if self.postForkCallback: self.postForkCallback() while self.isRunning.value: try: client = self.serverTransport.accept() if not client: continue self.serveClient(client) except (KeyboardInterrupt, SystemExit): return 0 except Exception as x: logger.exception(x) def serveClient(self, client): itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) try: while True: self.processor.process(iprot, oprot) except TTransportException: pass except Exception as x: logger.exception(x) itrans.close() otrans.close() def serve(self): self.isRunning.value = True self.serverTransport.listen() for i in range(self.numWorkers): try: w = Process(target=self.workerProcess) w.daemon = True w.start() self.workers.append(w) except Exception as x: logger.exception(x) while True: self.stopCondition.acquire() try: self.stopCondition.wait() break except (SystemExit, KeyboardInterrupt): break except Exception as x: logger.exception(x) self.isRunning.value = False def stop(self): self.isRunning.value = False self.stopCondition.acquire() self.stopCondition.notify() self.stopCondition.release()
class Task(object): """ Task is an unit of execution contained within a workflow. A task accepts zero or more inputs and outputs zero or more results. Multiple results are returned in the form of a python tuple. Attributes: name: Task name. Defaults to the function name which corresponds to the task id: Task UUID graph: Task graph which the task is bound to _runner: Executable function associated with the task. This gets executed at runtime. May include additional code than user provided task logic (i.e: pre and post task handlers) _fn: User given function for the task (this is a python code object) _sig: Original task (function) signature _args: Task (function) arguments _latch: Task trigger latch. Gets triggers once all non immediate inputs have been received triggered: Task input monitor. Gets notified and task woken up once all non immediate inputs have been received. Used in conjunction with _latch inputs: in-ports of the task. A dictionary with input argument name as key and an in-port object as value outputs: out-ports of the task. A dictionary with output name as key and an out-port object as value edges: Output edges of the task is_fusee: True if this task is contained within a FusedTask is_source: True if this task is a source of the associated task graph is_sink: True if this task is a sink of the associated task graph is_staging: True if this task is a staging task generated by the task graph compiler is_transform: True if this task is a data transformation task generated by the task graph compiler """ def __init__(self, runner, fn, sig, args, kwargs): self.name = fn.__name__ self.id = None self.graph = None self._runner = runner self._fn = fn self._sig = sig self._args = {} # Runtime control self._latch = Value('i', 0) self.triggered = Condition() # I/O self.inputs = {} self.outputs = {} self.edges = [] # Flags self.is_fusee = False self.is_fused = False self.is_source = False self.is_sink = False self.is_staging = False self.is_transform = False self._set_inputs(fn, args, kwargs) self._set_outputs(fn, args) def __repr__(self): return self.name def __str__(self): return self.name def _set_inputs(self, fn, args, kwargs): sig = self._sig params = sig.parameters # print(params) ba = sig.bind(*args, **kwargs) ba.apply_defaults() arguments = ba.arguments # print(arguments) if len(params) != len(arguments): raise Exception( "{} accepts {} arguments. But {} were given".format( self.name, len(params), len(arguments))) for pname, param in params.items(): value = arguments[pname] py_type = param.annotation param_type = None if py_type == param.empty: if isinstance(value, Task): parent = value # Get the only out-port of the parent task outport = next(iter(parent.outputs.values())) param_type = outport.type elif isinstance(value, Tasklet): parent = value.parent outport = parent.outputs[value.out_slot_in_parent] param_type = outport.type else: py_type = type(value) param_type = get_type(py_type) else: param_type = get_type(py_type) self._args[pname] = value inport = Backend.get_current_backend().get_port( param_type, pname, -1, self) #LocalPort(param_type, pname, -1, self) self.inputs[pname] = inport if isinstance(value, Task): inport.is_immediate = False parent = value # Get the only out-port of the parent task outport = next(iter(parent.outputs.values())) edge = Edge(outport, inport) parent.edges.append(edge) # Unsynchronized access here since we know graph generation is # single threaded self._latch.value += 1 elif isinstance(value, Tasklet): inport.is_immediate = False parent = value.parent outport = parent.outputs[value.out_slot_in_parent] edge = Edge(outport, inport) parent.edges.append(edge) # Unsynchronized access here since we know graph generation is # single threaded self._latch.value += 1 def _set_outputs(self, fn, args): sig = self._sig if type(sig.return_annotation) == tuple: for index, ret_type in enumerate(sig.return_annotation): self.outputs[str( index)] = Backend.get_current_backend().get_port( get_type(ret_type), str(index), index, self) else: ret_type = sig.return_annotation type_obj = None if ret_type == sig.empty: # [FIXME] Code debt - Currently we have two dynamic types. One # for builtins and one for files. Here I just assume if we # an untyped return it is a file type. This needs fixing if we # want to return any untyped builtins as well. ret_type = 'anyfile' type_obj = get_type(ret_type) elif type(ret_type) == str and ret_type.startswith('@args'): # Get the actual type from the task args input # arg index follows '@args' prefix. Need to make it zero indexed arg_index = int(ret_type[5]) - 1 # arg accessor follows the arg_index arg_accessor = ret_type[7:] arg = args[arg_index] ret_type = getattr(arg, arg_accessor) type_obj = get_type(ret_type) else: type_obj = get_type(ret_type) self.outputs[str(0)] = Backend.get_current_backend().get_port( type_obj, str(0), 0, self) def get_parents(self): parents = set() for name, inport in self.inputs.items(): if not inport.is_immediate and inport.inport_edge != None: if inport.inport_edge.source.task_ref == None: raise Exception("Null parent for out-port") parents.add(inport.inport_edge.source.task_ref) return list(parents) def get_children(self): if self.is_sink: return [] children = set() for edge in self.edges: children.add(edge.dest.task_ref) return list(children) def send(self, ret): log.debug("Sending value {} from {}".format(ret, self.name)) # We have multiple out-ports and we need to route return values to the # corresponding out-ports if type(ret) == tuple and type(self._sig.return_annotation) == tuple: for edge in self.edges: edge.send(ret[edge.source.index]) else: log.debug("Number of edges: {}".format(len(self.edges))) for edge in self.edges: edge.send(ret) def receive(self): is_one_sided_receive = False for name, inport in self.inputs.items(): if not inport.is_immediate: # result = inport.receive() # print("{} : {}".format(inport.name, result)) is_one_sided_receive |= inport.is_one_sided_receive inport.receive() # If communication is one sided i.e: task is not actively waiting for # inputs we shouldn't block this thread since other in-ports needs to # run on this thread and push the rest of the inputs to the task if self._latch.value and is_one_sided_receive: return # Latch is triggered and task run when we get all the inputs # That will be the case if the in-ports are blocking at receive() # or communication is one-sided (in which case we make sure we get to # here only after getting all the inputs as per the conditional above). # If the in-ports are non blocking we wait on the `triggered` monitor while self._latch.value: with self.triggered: self.triggered.wait() Backend.get_current_backend().run_task(self) def run(self): self.send(self._runner(**self._args)) self.graph.mark_completed(self.id) def dump(self): print("Task : {}".format(self.name)) print("Inputs :") for name, inport in self.inputs.items(): print("{}".format(name)) print("Output :") for name, outport in self.outputs.items(): print("{}".format(name)) for edge in self.edges: edge.dump() print("------------")
class RWLock(): """A Readers-Writer lock. Allows for multiple readers or one writer. Writers will not starve. Attributes: for_reading (RWLock.ReadLock): A lock-like object with appropriate `acquire`, `release`, `__enter__` and `__exit__` methods pointed to the *read methods of the RWLock. Chiefly for use with the `with` statement. for_writing (RWLock.WriteLock): A lock-like object with appropriate `acquire`, `release`, `__enter__` and `__exit__` methods pointed to the *write methods of the RWLock. Chiefly for use with the `with` statement. """ class ReadLock(): def __init__(self, rw): self._rw = rw self.acquire = rw.acquire_read self.release = rw.release_read def __enter__(self): self.acquire() def __exit__(self, exception_type, exception_value, traceback): self.release() class WriteLock(): def __init__(self, rw): self._rw = rw self.acquire = rw.acquire_write self.release = rw.release_write def __enter__(self): self.acquire() def __exit__(self, exception_type, exception_value, traceback): self.release() def __init__(self): """Initialises the RWLock.""" self._condition = Condition() self._readers = Value(c_uint64, 0, lock=False) self._writers_waiting = Value(c_uint64, 0, lock=False) self.for_reading = self.ReadLock(self) self.for_writing = self.WriteLock(self) def acquire_read(self): """Acquire a read lock. Blocks if a thread has acquired the write lock or is waiting to acquire the write lock. """ with self._condition: while self._writers_waiting.value: self._condition.wait() self._readers.value += 1 def release_read(self): """Release a read lock.""" with self._condition: self._readers.value -= 1 if not self._readers.value: self._condition.notify_all() def acquire_write(self): """Acquire a write lock. Blocks until there are no acquired read or write locks. """ self._condition.acquire() self._writers_waiting.value += 1 while self._readers.value: self._condition.wait() self._writers_waiting.value -= 1 def release_write(self): """Release a write lock.""" self._condition.release()
class Orchestrator: def __init__(self, submission_queue: multiprocessing.Queue, status_provider: BatchStatusProvider, config_file: str, strict_config: bool, log_folder: str, cache_search_dirs: List[str], log_event_que: LogEventQueue, singleton_run_summary_path: Optional[str] = None): self._submission_que: multiprocessing.Queue = submission_queue self._status_provider: BatchStatusProvider = status_provider self._config_file: str = config_file self._strict_config: bool = strict_config self._log_folder: str = log_folder self._cache_search_dirs = cache_search_dirs self._log_event_que = log_event_que self._singleton_run_summary_path = singleton_run_summary_path self._on_batch_id = -1 self._on_batch_type: type = type(None) self._master_thread = Thread(target=self._master_thread_loop, name="OrchestratorMasterThread", args=(()), daemon=True) self._run_summary_thread_gate = Event() self._run_summary_thread = Thread(target=self._run_summary_loop, name="OrchestratorRunSummaryThread", args=(()), daemon=True) self.__debug_loop_thread = Thread(target=self.__debug_loop, name="OrchestratorDebugLoop", args=(()), daemon=True) #TODO(andwald): The following hypothetical thread dynamically sets RTF and Concurrency of EndpointManagers # according to its own decoupled logic. This will be nice and pluggable since EndpointManagers # already adhere to whatever the dynamic settings are for the Atomic Shared Variables of # RTF and Concurrency, which is what this thread will manipulate. # self._perf_thread = Thread(target=self.perf_thread_loop, name="OrchestratorPerfThread", args=(self,), daemon=True) self._file_queue = Queue() self._file_queue_size = 0 self._in_progress: Dict[str, WorkItemRequest] = { } # WorkItemRequest.filepath -> WorkItemRequest self._in_progress_owner: Dict[str, EndpointManager] = { } # WorkItemRequest.filepath -> EndpointManager self._work_results: Dict[str, WorkItemResult] = { } # WorkItemRequest.filepath -> WorkItemResult self._batch_completion_evt = Event() self._accounting_lock = RLock() self._file_queue_cond = Condition(self._accounting_lock) self._run_summary_lock = Lock() self._misc_lock = Lock() self._summarizer: BatchRunSummarizer = None self._stop_requested = False self._endpoint_managers: List[EndpointManager] = [] self._endpoint_generation = 0 self._old_managers = set( ) # Set[str], contains names of now-inactive endpoint managers self._config_notifier: ThreadedNotifier = \ notify_file_modified(self._config_file, self.hotswap_endpoint_managers, self._log_event_que) self._start_time = time.time() self._creator_pid = current_process().pid logger.info("Orchestrator created by process: {0}".format( self._creator_pid)) self.__cnt_work_success_cb = 0 self.__cnt_work_failure_cb = 0 self._master_thread.start() self._run_summary_thread.start() # self.__debug_loop_thread.start() # Enable to debug concurrency changes. def is_alive(self): return self._master_thread.is_alive() def join(self): self._master_thread.join() def _run_summary_loop(self): while not self._stop_requested: # Prevent redundant updates when nothing can change. self._run_summary_thread_gate.wait() if self._stop_requested: return if self._on_batch_id > -1 and self._summarizer: try: self.write_summary_information(write_run_summary=True, log_conclusion_msg=False) # Don't ever let this thread die as it's too important. # Log and re-try. Repetitive failure loop will at least get logged. except Exception as e: exception_details = traceback.format_exc() logger.error( "Orchestrator: run_summary_thread in run_summary_loop(): " "Caught {0}, \nDetails: {1}".format( type(e).__name__, exception_details)) time.sleep(RUN_SUMMARY_LOOP_INTERVAL) def __debug_loop(self): """ This is only intended to be used during development and debugging. """ def _check_lock_acq(lock): acquired = lock.acquire(block=False) if acquired: lock.release() return False # We weren't able to acquire, so it's taken return True # Loop forever. This is a daemonic thread and it will intentionally # only die when the process owning Orchestrator dies. last_cnt_work_success = 0 while True: logger.debug("Stop requested: {0}".format(self._stop_requested)) logger.debug("Batch que size: {0}".format( self._submission_que.qsize())) logger.debug("On batch id: {0}".format(self._on_batch_id)) logger.debug("File queue size: {0}".format(self._file_queue_size)) logger.debug("Num in progress: {0}".format(len(self._in_progress))) logger.debug("Orchestrator accounting lock taken: {0}".format( _check_lock_acq(self._accounting_lock))) logger.debug("Status provider accounting lock taken: {0}".format( _check_lock_acq(BatchStatusProvider.lock))) logger.debug( "Notify work success callback entry count: {0}".format( self.__cnt_work_success_cb)) logger.debug( "Work items completed since last debug print: {0}".format( self.__cnt_work_success_cb - last_cnt_work_success)) last_cnt_work_success = self.__cnt_work_success_cb logger.debug( "Notify work failure callback entry count: {0}".format( self.__cnt_work_failure_cb)) logger.debug("Run summary thread alive: {0}".format( self._run_summary_thread.is_alive())) logger.debug("Number of old endpoint managers: {0}".format( len(self._old_managers))) for epm in self._endpoint_managers: logger.debug("Endpoint manager: {0}".format(epm.name)) logger.debug(" Current requests: {0}".format( epm._current_requests)) logger.debug(" Current requests lock taken: {0}".format( _check_lock_acq(epm._current_requests_lock))) logger.debug(" Pool apply_async count: {0}".format( epm._cnt_apply_async)) logger.debug(" Pool callback count: {0}".format( epm._cnt_pool_cb)) logger.debug(" Pool callback returns count: {0}".format( epm._cnt_pool_cb_rets)) logger.debug(" Stop requested: {0}".format( epm._stop_requested)) logger.debug(" Now trying to steal work: {0}".format( epm._in_steal_work_fn)) logger.debug("Stack frames of all threads:") logger.debug("\n*** STACKTRACE - START ***\n") current_threads_stacktrace(use_logger=True) logger.debug("\n*** STACKTRACE - END ***\n") time.sleep(DEBUG_LOOP_INTERVAL) def write_summary_information(self, write_run_summary: bool = True, log_conclusion_msg: bool = False, allow_fail: bool = False): """ Summarize individual file results, along with overall results, and write them to log and/or file. Also log a conclusion message if requested. :param write_run_summary: whether run summary (individual files + overall) should be written to file. :param log_conclusion_msg: whether a conclusion message should be logged which includes final stats and lists failures. :param allow_fail: log failure to write run summary but do not raise exception. """ # To ensure history serialization, we wrap this method # in its own lock that nobody else contends with except for # the threads that invoke this. with self._run_summary_lock: # Take a consistent snapshot and then report on the snapshot # without holding back forward progress. with self._accounting_lock: snap_work_results: Dict[str, WorkItemResult] = copy.deepcopy( self._work_results) snap_file_queue_size: int = self._file_queue_size snap_num_running: int = len(self._in_progress) snap_run_summarizer: BatchRunSummarizer = self._summarizer snap_batch_id: int = self._on_batch_id summary_json = {} # It's uncommon that a run summarizer wouldn't be available yet but this could # happen for example by signaling early termination to the Orchestrator. if snap_run_summarizer: summary_json = snap_run_summarizer.run_summary( snap_work_results, snap_file_queue_size, snap_num_running, self._start_time, len(self._endpoint_managers), log_conclusion_msg) # Write the summary json file if write_run_summary: try: if self._singleton_run_summary_path: logger.debug( "Updating singleton run_summary: {0}".format( self._singleton_run_summary_path)) write_json_file_atomic( summary_json, self._singleton_run_summary_path) else: try: self._status_provider.set_run_summary( snap_batch_id, summary_json) except BatchNotFoundException: # This is benign and means we caught a rare race condition # in which the batch directory is very recently deleted. pass # Minimal throttle on file writes. We are under _run_summary_lock. time.sleep(3) except Exception as e: logger.warning("Failed to write run_summary: {0}".format( str(e))) if not allow_fail: raise def request_stop(self): """ Arrange for conditions that will lead to a fast conclusion of any ongoing batch without finishing whatever is remaining or in progress in this batch if any. This will also cause EndpointManagers to shut down. Orchestrator's join() is guaranteed to eventually return. """ # Assume this might be called from a signal handler. # Instead of preventing child proc inheritance of signals, # we eliminate any leaky abstractions by permitting children # and those who spawn them to be completely blameless # for creating unexpected conditions. if current_process().pid != self._creator_pid: return with self._misc_lock: try: if self._config_notifier: self._config_notifier.stop() self._config_notifier = None except OSError as e: # ThreadedNotifier.stop() is not idempotent and gives # errno EBADF if it is already stopped. if e.errno != errno.EBADF: raise # A couple facts about Python3 in case there is any concern # about being invoked by a signal handler. # 1 - Only the main thread of a process can handle # signals, so now we know we are the main thread of the # creator process in the signal case. # 2 - When running a signal handler, the main thread is # is still subject to preemption at tick and the GIL # can still be released for other threads. This means # that picking up the lock here cannot create deadlock, # unless the main thread itself was holding the lock before # the signal. That's why we use ReentrantLock. with self._accounting_lock: self._stop_requested = True while self._file_queue_size > 0: self._file_queue.get() self._file_queue_size -= 1 self._submission_que.put(None) self._file_queue_cond.notify_all() self._batch_completion_evt.set() for m in self._endpoint_managers: m.request_stop() self._run_summary_thread_gate.set() def steal_work(self, manager: EndpointManager) -> WorkItemRequest: """ :param manager: the EndpointManager who is trying to steal work. :returns str: audio file of work to do """ sentinel = SentinelWorkItemRequest() # Classic consumer waiter pattern using condition variable. self._accounting_lock.acquire() while True: if manager.name in self._old_managers or self._stop_requested: work = sentinel break if self._file_queue_size > 0: work: WorkItemRequest = self._file_queue.get() self._file_queue_size -= 1 # Eliminate this manager early if we detect a language mismatch. # It will be recreated on a new batch. if work.language and manager.endpoint_config["language"].lower( ) != work.language.lower(): self._file_queue.put( work) # back on queue for someone qualified self._file_queue_size += 1 self._file_queue_cond.notify() work = sentinel break # Got some work to do! self._in_progress[work.filepath] = work self._in_progress_owner[work.filepath] = manager break else: # Back to sleep because we got nothing. self._file_queue_cond.wait( ) # implicit self.accounting_lock.release() self._accounting_lock.release() return work def _merge_results(self, filepath: str, result: WorkItemResult): if filepath not in self._work_results: self._work_results[filepath] = result else: prev_attempts = self._work_results[filepath].attempts result.attempts += prev_attempts self._work_results[filepath] = result def notify_work_success(self, filepath: str, manager: EndpointManager, result: WorkItemResult): with self._accounting_lock: self.__cnt_work_success_cb += 1 if manager.name in self._old_managers: # The AudioFileWork item would already be back in pending # or running by someone else or finished. Covers an uncommon race. return if self._stop_requested: # It's too late for updating batch status and we're about to die. return del self._in_progress[filepath] del self._in_progress_owner[filepath] self._merge_results(filepath, result) # Did we just finish the batch? if self._file_queue_size == 0 and len(self._in_progress) == 0: self._batch_completion_evt.set() def notify_work_failure(self, filepath: str, manager: EndpointManager, result: WorkItemResult): with self._accounting_lock: self.__cnt_work_failure_cb += 1 if manager.name in self._old_managers: # The WorkItemResult would already be back in pending # or running by someone else or finished. Covers an uncommon race. return if self._stop_requested: # It's too late for updating batch status and we're about to die. return self._merge_results(filepath, result) # Do we give it another chance? # Check retry-ability and num retries burned already. if result.can_retry and \ self._work_results[filepath].attempts - 1 < ORCHESTRATOR_SCOPE_MAX_RETRIES: self._log_event_que.debug( "Placed work item {0} back into queue since retriable.". format(filepath)) self._file_queue.put(self._in_progress[filepath]) self._file_queue_size += 1 self._file_queue_cond.notify() # Else no more retries. # Either way the item is no longer in progress. del self._in_progress[filepath] del self._in_progress_owner[filepath] # Did we just finish the batch? E.g. finally gave up on this work # item and that so happens to be the last in the batch. if self._file_queue_size == 0 and len(self._in_progress) == 0: self._batch_completion_evt.set() def hotswap_endpoint_managers(self): config_data = load_configuration(self._config_file, self._strict_config) with self._accounting_lock: if self._stop_requested: return # Get the unique generation of these endpoint managers, which # is useful for both debugging and logging. gen = self._endpoint_generation self._endpoint_generation += 1 # Get an EndpointStatusChecker for the type of the # BatchRequest that is currently being processed. ep_status_checker: EndpointStatusChecker if isinstance(None, self._on_batch_type): ep_status_checker = UnknownEndpointStatusChecker( self._log_event_que) else: ep_status_checker = self._on_batch_type.get_endpoint_status_checker( self._log_event_que) try: # Determine EndpointManagers that need to be deleted (modified, new, # or no longer exist). Do not touch EndpointManagers that have not changed. new_em_objs: List[EndpointManager] = [] # Start by assuming every EndpointManager needs to be deleted. deleted_managers: Dict[str, EndpointManager] = \ {em.endpoint_name: em for em in self._endpoint_managers} for endpoint_name, endpoint_config in config_data.items(): # If an existing endpoint is totally preserved in the new config, don't delete it. # Also require that the endpoint's manager is not terminally stopped, otherwise we need # a new instance of it anyway. if endpoint_name in deleted_managers and \ endpoint_config == deleted_managers[endpoint_name].endpoint_config and \ not deleted_managers[endpoint_name]._stop_requested: # noqa # Don't delete this EndpointManager and don't make a new one. del deleted_managers[endpoint_name] continue new_em_objs.append( EndpointManager( "HotswapGen{0}_{1}".format(str(gen), endpoint_name), endpoint_name, endpoint_config, self._log_folder, self._log_event_que, self._cache_search_dirs, # on EndpointManager has capacity to steal work self.steal_work, # on EndpointManager reports success self.notify_work_success, # on EndpointManager reports failure self.notify_work_failure, ep_status_checker, )) # Validation of the config could fail or invalid yaml may have been given, etc. # We catch anything so that we may permit another attempt later with a proper config file. # We report it in the logs and somewhere else we will die if no forward progress for too long. except Exception as e: exception_details = traceback.format_exc() logger.error( "Caught Exception '{0}' reading config. Details: {1}\n{2}". format(type(e).__name__, str(e), exception_details)) # Don't proceed to stop the old EndpointManagers because they're all we've got to go on. return if self._stop_requested: return # Also swap the EndpointManagers under lock in case of race. # First stop the old EndpointManagers to be deleted. for m in self._endpoint_managers: if m.endpoint_name in deleted_managers: self._old_managers.add(m.name) m.request_stop() # Un-assign work in progress for deleted EndpointManagers. # Now anything the old managers might still callback # would be rejected so we can safely move in progress back to queue. work_in_progress = {k: v for k, v in self._in_progress.items() } # shallow copy work_in_progress_owner = { k: v for k, v in self._in_progress_owner.items() } # shallow copy for filepath, work_item in self._in_progress.items(): owner_endpoint_name = self._in_progress_owner[ filepath].endpoint_name # If the EndpointManager that owns this work item is being deleted, # free up the work item. if owner_endpoint_name in deleted_managers: del work_in_progress[filepath] del work_in_progress_owner[filepath] self._file_queue.put(work_item) self._file_queue_size += 1 self._in_progress = work_in_progress self._in_progress_owner = work_in_progress_owner # We've potentially repopulated the file_queue. self._file_queue_cond.notify_all() # Start the new EndpointManagers. for m in new_em_objs: m.start() # Record the latest set of all EndpointManagers. self._endpoint_managers = \ [em for em in self._endpoint_managers if em.endpoint_name not in deleted_managers] + \ new_em_objs # Ensure that they are all using the correct type of EndpointStatusChecker # which depends on the subtype of BatchRequest we are currently processing. for m in self._endpoint_managers: m.set_endpoint_status_checker(ep_status_checker) logger.info( "Set new EndpointManagers after hot-swap: {0}".format(config_data)) def _master_finalize(self): """ Work to be done before Orchestrator's master thread exits. """ # Log conclusion of run_summary information if at singleton level. if self._singleton_run_summary_path: self.write_summary_information(write_run_summary=False, log_conclusion_msg=True) def _master_thread_loop(self): # Keep doing batches until given a stop request. while True: # Starting a new batch. request: BatchRequest = self._submission_que.get() with self._accounting_lock: self._on_batch_type = type(request) # Recreate the endpoints on start of a new batch in case # the last batch disabled endpoints, e.g. for mismatched # language or other reasons. self.hotswap_endpoint_managers() with self._accounting_lock: if self._stop_requested: self._master_finalize() return # Starting a new batch. # Reset record keeping if it's not singleton run summary. if self._singleton_run_summary_path is None: self._work_results = {} self._summarizer = request.get_batch_run_summarizer() logger.info("Orchestrator: Starting batch {0}".format( request.batch_id)) self._status_provider.change_status_enum( request.batch_id, BatchStatusEnum.running) self._on_batch_id = request.batch_id self._batch_completion_evt.clear() self._run_summary_thread_gate.set() assert len(self._in_progress) == 0 assert self._file_queue_size == 0 for work in request.make_work_items( self._status_provider.batch_base_path( request.batch_id), self._cache_search_dirs, self._log_folder): self._file_queue.put(work) self._file_queue_size += 1 self._file_queue_cond.notify_all() # Wait for batch completion or early stop request. In both cases, # nothing is in progress and nothing is in queue when we're woken. self._batch_completion_evt.wait() logger.info("Orchestrator: Completed batch {0}".format( request.batch_id)) # Report per-batch final run_summary. if self._singleton_run_summary_path is None: self.write_summary_information(write_run_summary=True, log_conclusion_msg=True, allow_fail=True) # Even with singleton run_summary, we should update run_summary file # now but not log conclusion. else: self.write_summary_information(write_run_summary=True, log_conclusion_msg=False, allow_fail=True) # Concatenate batch-level results to single file. if request.combine_results: write_single_output_json( request.files, self._status_provider.batch_base_path(request.batch_id)) # Intentionally change status enum last so that above results committed first # for any event-driven observers. self._status_provider.change_status_enum(request.batch_id, BatchStatusEnum.done) logger.info( "Orchestrator: Updated batch status to Done: {0}".format( request.batch_id)) # As another batch may not show up for a while (or never), stop the periodic # run summary thread since no new information to report. self._run_summary_thread_gate.clear()
class WaitableQueue(Queue): """Queue that uses a semaphore to reliably count items in it""" class Vacuum(ThreadLoop): def __init__(self, q, l): def callback(): q.wait_notempty(0.1) while True: try: val = q.get(False) l.append(val) except Empty: break ThreadLoop.__init__(self, callback) def __init__(self, maxsize=0): self.cond_empty = Condition() self.cond_notempty = Condition() self._put_counter = Value('i', 0) Queue.__init__(self, maxsize) def put(self, obj, block=True, timeout=None): Queue.put(self, obj, block, timeout) self._put_counter.value += 1 if self.qsize() != 0: self.cond_notempty.acquire() try: self.cond_notempty.notify_all() finally: self.cond_notempty.release() @property def put_counter(self): return self._put_counter.value def get(self, block=True, timeout=None): ret = Queue.get(self, block, timeout) if self.qsize() == 0: self.cond_empty.acquire() try: self.cond_empty.notify_all() finally: self.cond_empty.release() return ret def wait_empty(self, timeout=None): """Wait for all items to be got""" self.cond_empty.acquire() try: if self.qsize(): self.cond_empty.wait(timeout) finally: self.cond_empty.release() def wait_notempty(self, timeout=None): """Wait for all items to be got""" self.cond_notempty.acquire() try: if self.qsize() == 0: self.cond_notempty.wait(timeout) finally: self.cond_notempty.release()
class Pipeline(object): """ A collection that is similar to Python's Queue object, except it also tracks items that are currently sleeping or in progress. """ def __init__(self, max_working = 1): self.condition = Condition(RLock()) self.max_working = max_working self.running = True self.paused = False self.queue = None self.force = None self.sleeping = None self.working = None self.item2id = None self.id2item = None # for performance reasons self.name2id = None self.id2name = None self.clear() def __len__(self): with self.condition: return len(self.id2item) def __contains__(self, item): with self.condition: return item in self.item2id def _register_item(self, name, item): uuid = uuid4().hex self.id2item[uuid] = item self.item2id[item] = uuid if name is None: return uuid if name in self.name2id: msg = 'an item named %s is already queued' % repr(name) raise AttributeError(msg) self.name2id[name] = uuid self.id2name[uuid] = name return uuid def get_from_name(self, name): """ Returns the item with the given name, or None if no such item is known. """ with self.condition: try: item_id = self.name2id[name] except KeyError: return None return self.id2item[item_id] return None def has_id(self, item_id): """ Returns True if the queue contains an item with the given id. """ return item_id in self.id2item def task_done(self, item): with self.condition: try: self.working.remove(item) except KeyError: # This may happen if we receive a notification from a # thread that was previously enqueued, but then the # workqueue was forcefully stopped without waiting for # child threads to complete. self.condition.notify_all() return item_id = self.item2id.pop(item) self.id2item.pop(item_id) try: name = self.id2name.pop(item_id) except KeyError: pass else: self.name2id.pop(name) self.condition.notify_all() def append(self, item, name = None): """ Adds the given item to the end of the pipeline. """ with self.condition: self.queue.append(item) uuid = self._register_item(name, item) self.condition.notify_all() return uuid def appendleft(self, item, name = None, force = False): with self.condition: if force: self.force.append(item) else: self.queue.appendleft(item) uuid = self._register_item(name, item) self.condition.notify_all() return uuid def prioritize(self, item, force = False): """ Moves the item to the very left of the queue. """ with self.condition: # If the job is already running (or about to be forced), # there is nothing to be done. if item in self.working or item in self.force: return self.queue.remove(item) if force: self.force.append(item) else: self.queue.appendleft(item) self.condition.notify_all() def clear(self): with self.condition: self.queue = deque() self.force = deque() self.sleeping = set() self.working = set() self.item2id = dict() self.id2item = dict() self.name2id = dict() self.id2name = dict() self.condition.notify_all() def stop(self): """ Force the next() method to return while in another thread. The return value of next() will be None. """ with self.condition: self.running = False self.condition.notify_all() def start(self): with self.condition: self.running = True self.condition.notify_all() def pause(self): with self.condition: self.paused = True self.condition.notify_all() def unpause(self): with self.condition: self.paused = False self.condition.notify_all() def sleep(self, item): with self.condition: self.sleeping.add(item) self.condition.notify_all() def wake(self, item): assert item in self.sleeping with self.condition: self.sleeping.remove(item) self.condition.notify_all() def wait_for_id(self, item_id): with self.condition: while self.has_id(item_id): self.condition.wait() def wait(self): """ Waits for all currently running tasks to complete. """ with self.condition: while self.working: self.condition.wait() def wait_all(self): """ Waits for all queued and running tasks to complete. """ with self.condition: while len(self) > 0: self.condition.wait() def with_lock(self, function, *args, **kwargs): with self.condition: return function(self, *args, **kwargs) def set_max_working(self, max_working): with self.condition: self.max_working = int(max_working) self.condition.notify_all() def get_max_working(self): return self.max_working def get_working(self): return list(self.working) def _popleft_sleeping(self): sleeping = [] while True: try: node = self.queue[0] except IndexError: break if node not in self.sleeping: break sleeping.append(node) self.queue.popleft() return sleeping def _get_next(self, pop = True): # We need to leave sleeping items in the queue because else we # would not know their original position after they wake up. # So we need to temporarily remove sleeping items from the top of # the queue here. sleeping = self._popleft_sleeping() # Get the first non-sleeping item from the queue. if pop: try: next = self.queue.popleft() except IndexError: next = None else: try: next = self.queue[0] except IndexError: next = None # Re-insert sleeping items. self.queue.extendleft(sleeping) return next def try_next(self): """ Like next(), but only returns the item that would be selected right now, without locking and without changing the queue. """ with self.condition: try: return self.force[0] except IndexError: pass return self._get_next(False) def next(self): with self.condition: while self.running: if self.paused: self.condition.wait() continue # Wait until enough slots are available. if len(self.working) - \ len(self.sleeping) - \ len(self.force) >= self.max_working: self.condition.wait() continue # Forced items are returned regardless of how many tasks # are already working. try: next = self.force.popleft() except IndexError: pass else: self.working.add(next) return next # Return the first non-sleeping task. next = self._get_next() if next is None: self.condition.wait() continue self.working.add(next) return next return None
if line == '': continue idx = int(line[:line.find(' ')]) line = line[line.find(' ') + 1:] if idx != 1 or full_ex == '': full_ex = full_ex + line + '\n' continue # next line else: cnt_exs += 1 load(full_ex.strip()) if args['n'] is not None and cnt_exs >= args['n']: full_ex = '' break full_ex = line + '\n' # process last full_ex if out of new lines if full_ex != '': load(full_ex.strip()) while queued_exs.value - proced_exs.value > 0: with finished: finished.wait() for t in threads: t.terminate() fin = time.time() print('Time processing entities: {} s'.format(round(mid - beg))) print('Time processing examples: {} s'.format(round(fin - mid))) print('Total time: {} s'.format(round(fin - beg)))
class Cpu(object): def __init__(self): self.pcb = None self.__mutex = RLock() self.__pcb_not_set = Condition(self.__mutex) self.__mem_not_allocated = Condition(self.__mutex) self.__round_robin_policy_on = False def enable_round_robin(self, round_robin_quantum): self.__round_robin_policy_on = True self.__round_robin = RoundRobin(round_robin_quantum) def pcb_not_set(self): return self.__pcb_not_set def set_kernel(self, kernel): self.__kernel = kernel def is_pcb_set(self): return self.pcb != None def set_current_pcb(self, pcb): with self.__pcb_not_set: self.pcb = pcb self.__pcb_not_set.notify() def reset_pcb(self): self.pcb = None def get_current_pcb(self): return self.pcb def __get_mem_manager(self): return self.__kernel.get_mem_manager() def __get_irq_manager(self): return self.__kernel.get_irq_manager() def fetch_decode_and_execute(self): with self.__pcb_not_set: while(not self.is_pcb_set()): self.__pcb_not_set.wait() with self.__mutex: self.__fetch() self.__decode() self.__execute() def __fetch(self): pcb = self.get_current_pcb() address = self.__get_mem_manager().current_instruction_address(pcb) with self.__mem_not_allocated: while self.__get_mem_manager().get(pcb,address) == None: self.__mem_not_allocated.wait() self.__current_instruction = self.__get_mem_manager().get(pcb, address ) def __decode(self): self.__send_interruption_if_is_io() self.__send_interruption_if_is_kill() def __send_interruption_if_is_kill(self): if(self.__current_instruction.is_kill_instruction()): self.send_end() def __send_interruption_if_is_io(self): if(self.__current_instruction.is_io_instruction()): self.send_io() def __execute(self): self.__execute_if_is_cpu_instruction() def __execute_if_is_cpu_instruction(self): if (self.__current_instruction.is_cpu_instruction()): self.__current_instruction.run() self.get_current_pcb().increment_pc() def send_interruption(self, a_interruption): self.__get_irq_manager().handle(Irq(a_interruption, self.get_current_pcb())) def send_timeout(self): self.send_interruption(TIMEOUT_INTERRUPT) def send_end(self): self.send_interruption(KILL_INTERRUPT) def send_io(self): self.send_interruption(IO_INTERRUPT) def on_signal(self): if self.__round_robin_policy_on: self.__round_robin.handle_action(self) else: self.fetch_decode_and_execute()
class TProcessPoolServer(TServer): """ Server with a fixed size pool of worker subprocesses which service requests. Note that if you need shared state between the handlers - it's up to you! Written by Dvir Volk, doat.com """ def __init__(self, * args): TServer.__init__(self, *args) self.numWorkers = 10 self.workers = [] self.isRunning = Value('b', False) self.stopCondition = Condition() self.postForkCallback = None def setPostForkCallback(self, callback): if not callable(callback): raise TypeError("This is not a callback!") self.postForkCallback = callback def setNumWorkers(self, num): """Set the number of worker threads that should be created""" self.numWorkers = num def workerProcess(self): """Loop around getting clients from the shared queue and process them.""" if self.postForkCallback: self.postForkCallback() while self.isRunning.value == True: try: client = self.serverTransport.accept() self.serveClient(client) except (KeyboardInterrupt, SystemExit): return 0 except (Exception) as x: logging.exception(x) def serveClient(self, client): """Process input/output from a client for as long as possible""" itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) try: while True: self.processor.process(iprot, oprot) except (TTransportException) as tx: pass except (Exception) as x: logging.exception(x) itrans.close() otrans.close() def serve(self): """Start a fixed number of worker threads and put client into a queue""" #this is a shared state that can tell the workers to exit when set as false self.isRunning.value = True #first bind and listen to the port self.serverTransport.listen() #fork the children for i in range(self.numWorkers): try: w = Process(target=self.workerProcess) w.daemon = True w.start() self.workers.append(w) except (Exception) as x: logging.exception(x) #wait until the condition is set by stop() while True: self.stopCondition.acquire() try: self.stopCondition.wait() break except (SystemExit, KeyboardInterrupt): break except (Exception) as x: logging.exception(x) self.isRunning.value = False def stop(self): self.isRunning.value = False self.stopCondition.acquire() self.stopCondition.notify() self.stopCondition.release()
class TModelPoolServer(TServer): ''' A server runs a pool of multiple models to serve requests Written by CongVm ''' def __init__(self, handler, listModelConfig, *args, logger=None, timeout=0.1, batchSize=1): TServer.__init__(self, *args) self.timeout = timeout self.batchSize = batchSize if logger is not None: self.logger = logger else: self.logger = logging.getLogger(__name__) self.listModelConfig = listModelConfig self.handler = handler self.workers = [] self.isRunning = Value('b', False) self.stopCondition = Condition() self.postForkCallback = None def setPostForkCallback(self, callback): if not callable(callback): raise TypeError("This is not a callback!") self.postForkCallback = callback def setListModelConfig(self, listModelConfig): """Set the number of worker threads that should be created""" self.listModelConfig = listModelConfig def workerProcess(self, kwargs): """Loop getting clients from the shared queue and process them""" # Init Processor here self.handlerInstance = self.handler(**kwargs) self.procInstance = self.processor(self.handlerInstance) if self.postForkCallback: self.postForkCallback() listClient = [] t = time() while self.isRunning.value: try: client = self.serverTransport.accept() if not client: continue listClient.append(client) if len(listClient) >= self.batchSize or time() - t >= self.timeout: self.serveClient(self.procInstance, listClient) listClient.clear() t = time() except (KeyboardInterrupt, SystemExit): return 0 except Exception as x: self.logger.exception(x) def parseClients(self, listClient): listOtrans = [] listItrans = [] listIprot = [] listOprot = [] for client in listClient: itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) listOtrans.append(otrans) listItrans.append(itrans) listIprot.append(iprot) listOprot.append(oprot) return listOtrans, listItrans, listIprot, listOprot def serveClient(self, procInstance, listClient): """Process input/output from a client for as long as possible""" listOtrans, listItrans, listIprot, listOprot = self.parseClients(listClient) try: while True: procInstance.process(listIprot, listOprot) except TTransportException: pass except Exception as x: self.logger.exception(x) for itrans, otrans in zip(listItrans, listOtrans): itrans.close() otrans.close() def serve(self): """Start workers and put into queue""" # this is a shared state that can tell the workers to exit when False self.isRunning.value = True # first bind and listen to the port self.serverTransport.listen() # fork the children for modelConfig in self.listModelConfig: try: w = Process(target=self.workerProcess, args=(modelConfig, )) w.daemon = True w.start() self.workers.append(w) except Exception as x: self.logger.exception(x) # wait until the condition is set by stop() while True: self.stopCondition.acquire() try: self.stopCondition.wait() break except (SystemExit, KeyboardInterrupt): break except Exception as x: self.logger.exception(x) self.isRunning.value = False def stop(self): self.isRunning.value = False self.stopCondition.acquire() self.stopCondition.notify() self.stopCondition.release()
class IOManager(object): def __init__(self): self.capture_mode = False self.child_mode = False self.parent_mode = False def activate_as_child(self, output_lock, output_queue, status_line_cleared): self.parent_mode = False self.child_mode = True self.status_line_cleared = status_line_cleared self.output_lock = output_lock self.output_queue = output_queue def activate_as_parent(self, debug=False): assert not self.child_mode self.debug_mode = debug self.jobs = [] self.output_lock = Lock() self.parent_mode = True self.output_queue = Queue() self.status_line_cleared = Condition() self.thread = Thread(target=self._print_thread) self.thread.daemon = True self.thread.start() def ask(self, question, default, get_input=input_function): answers = _("[Y/n]") if default else _("[y/N]") question = question + " " + answers + " " with self.lock: while True: STDOUT_WRITER.write("\a") STDOUT_WRITER.write(question) STDOUT_WRITER.flush() answer = get_input() if answer.lower() in (_("y"), _("yes")) or ( not answer and default ): return True elif answer.lower() in (_("n"), _("no")) or ( not answer and not default ): return False STDOUT_WRITER.write(_("Please answer with 'y(es)' or 'n(o)'.\n")) @contextmanager def capture(self): self.capture_mode = True self.captured_io = { 'stderr': "", 'stdout': "", } yield self.captured_io self.capture_mode = False @property def child_parameters(self): return (self.output_lock, self.output_queue, self.status_line_cleared) def debug(self, msg): self.output_queue.put({'msg': 'LOG', 'log_type': 'DBG', 'text': msg}) def job_add(self, msg): self.output_queue.put({'msg': 'LOG', 'log_type': 'JOB_ADD', 'text': msg}) def job_del(self, msg): self.output_queue.put({'msg': 'LOG', 'log_type': 'JOB_DEL', 'text': msg}) def stderr(self, msg): self.output_queue.put({'msg': 'LOG', 'log_type': 'ERR', 'text': msg}) def stdout(self, msg): self.output_queue.put({'msg': 'LOG', 'log_type': 'OUT', 'text': msg}) @contextmanager def job(self, job_text): self.job_add(job_text) yield self.job_del(job_text) @property @contextmanager def lock(self): with self.output_lock: self.status_line_cleared.wait() yield def _print_thread(self): assert self.parent_mode while True: if self.output_lock.acquire(False): msg = self.output_queue.get() if msg['log_type'] == 'QUIT': break if self.debug_mode and msg['log_type'] in ('OUT', 'DBG', 'ERR'): msg['text'] = datetime.now().strftime("[%Y-%m-%d %H:%M:%S.%f] ") + msg['text'] if self.jobs and TTY: self._write("\r\033[K") if msg['log_type'] == 'OUT': self._write(msg['text'] + "\n") elif msg['log_type'] == 'ERR': self._write(msg['text'] + "\n", err=True) elif msg['log_type'] == 'DBG' and self.debug_mode: self._write(msg['text'] + "\n") elif msg['log_type'] == 'JOB_ADD' and TTY: self.jobs.append(msg['text']) elif msg['log_type'] == 'JOB_DEL' and TTY: self.jobs.remove(msg['text']) if self.jobs and TTY: self._write("[status] " + self.jobs[0]) self.output_lock.release() else: # someone else is holding the output lock # the process holding the lock should now be waiting for # us to remove any status lines present before it starts # printing if self.jobs and TTY: self._write("\r\033[K") self.status_line_cleared.notify() # now we wait until the other process has finished and # released the output lock self.output_lock.acquire() self.output_lock.release() def shutdown(self): assert self.parent_mode self.output_queue.put({'msg': 'LOG', 'log_type': 'QUIT'}) self.thread.join() def _write(self, msg, err=False): write_to_stream(STDERR_WRITER if err else STDOUT_WRITER, msg) if self.capture_mode: self.captured_io['stderr' if err else 'stdout'] += msg
class IODeviceManager(Thread): def __init__(self, a_device, a_kernel, std_in=StandardInput(), std_out=StandardOutput()): Thread.__init__(self) self.set_device(a_device) self.set_kernel(a_kernel) self.set_input(std_in) self.set_output(std_out) self.set_mutex(RLock()) self.set_queue(SoQueue()) self.device_is_in_use = Condition(self.get_mutex()) self.the_queue_is_empty = Condition(self.get_mutex()) def get_kernel(self): return self.kernel def set_kernel(self, a_kernel): self.kernel = a_kernel def set_input(self, a_input): self.std_in = a_input def get_input(self): return self.std_in def set_output(self, a_output): self.std_out = a_output def get_output(self): return self.std_out def get_mutex(self): return self.mutex def set_mutex(self, a_mutex): self.mutex = a_mutex def get_queue(self): return self.queue def set_queue(self, a_queue): self.queue = a_queue def set_device(self, a_device): self.device = a_device self.get_device().set_device_manager(self) def get_device(self): return self.device def the_device_is_busy(self): with self.get_mutex(): return not self.get_device().is_not_busy() def send_to_device(self): with self.device_is_in_use: while self.the_device_is_busy(): self.device_is_in_use.wait() with self.get_mutex(): self.get_device().set_pcb(self.get()) self.get_device().process_pcb() def notify_that_the_device_is_not_in_use(self): with self.device_is_in_use: self.device_is_in_use.notify() def put(self, a_pcb): with self.the_queue_is_empty: with self.get_mutex(): self.get_queue().add_pcb(a_pcb) self.the_queue_is_empty.notify() def get(self): with self.get_mutex(): return self.get_queue().get_first() def queue_is_empty(self): return self.get_queue().is_empty() def send_io_end_interruption(self, a_pcb): self.get_kernel().get_irq_manager().handle(Irq(IO_END_INTERRUPT, a_pcb)) def run(self): while True: with self.the_queue_is_empty: while self.queue_is_empty(): self.the_queue_is_empty.wait() self.send_to_device()
if args.train: if sizes: problems_to_solve = itertools.islice(itertools.imap(api.train, itertools.cycle(sizes)), args.train_amount) else: problems_to_solve = (api.train(6) for i in range(args.train_amount)) else: if sizes: problems_to_solve = filter(lambda p: p['size'] in sizes, original_problems) else: problems_to_solve = original_problems cond = Condition() for problem in problems_to_solve: slave = Process(target=submitter.submit_in_sandbox, args=(problem, True, cond)) cond.acquire() slave.start() print "MASTER: waiting for process to exhaust its variants." cond.wait(300) cond.release() if slave.is_alive(): print "MASTER: worker tried hard but no success so far. Letting him stay for a while." while active_children(): children = active_children() print "PROCESSES LEFT:", children for c in children: c.join(timeout=60)
class RenderProcess: """ Wraps a multiprocessing.Process for rendering. Assumes there is one MjSim per process. """ def __init__(self, device_id, setup_sim, update_sim, output_var_shape): """ Args: - device_id (int): GPU device to use for rendering (0-indexed) - setup_sim (callback): callback that is given a device_id and returns a MjSim. It is responsible for making MjSim render to given device. - update_sim (callback): callback given a sim and device_id, and should return a numpy array of shape `output_var_shape`. - output_var_shape (tuple): shape of the synchronized output array from `update_sim`. """ self.device_id = device_id self.setup_sim = setup_sim self.update_sim = update_sim # Create a synchronized output variable (numpy array) self._shared_output_var = Array(ctypes.c_double, int(np.prod(output_var_shape))) self._output_var = np.frombuffer(self._shared_output_var.get_obj()) # Number of variables used to communicate with process self._cv = Condition() self._ready = Value('b', 0) self._start = Value('b', 0) self._terminate = Value('b', 0) # Start the actual process self._process = Process(target=self._run) self._process.start() def wait(self): """ Wait for process to be ready for another update call. """ with self._cv: if self._start.value: self._cv.wait() if self._ready.value: return self._cv.wait() def read(self, copy=False): """ Reads the output variable. Returns a copy if copy=True. """ if copy: with self._shared_output_var.get_lock(): return np.copy(self._output_var) else: return self._output_var def update(self): """ Calls update_sim asynchronously. """ with self._cv: self._start.value = 1 self._cv.notify() def stop(self): """ Tells process to stop and waits for it to terminate. """ with self._cv: self._terminate.value = 1 self._cv.notify() self._process.join() def _run(self): sim = self.setup_sim(self.device_id) while True: with self._cv: self._ready.value = 1 self._cv.notify_all() with self._cv: if not self._start.value and not self._terminate.value: self._cv.wait() if self._terminate.value: break assert self._start.value self._start.value = 0 # Run the update and assign output variable with self._shared_output_var.get_lock(): self._output_var[:] = self.update_sim(sim, self.device_id).ravel()
class TProcessPoolServer(TServer): """Server with a fixed size pool of worker subprocesses to service requests Note that if you need shared state between the handlers - it's up to you! Written by Dvir Volk, doat.com """ def __init__(self, *args): TServer.__init__(self, *args) self.numWorkers = 10 self.workers = [] self.isRunning = Value('b', False) self.stopCondition = Condition() self.postForkCallback = None def setPostForkCallback(self, callback): if not callable(callback): raise TypeError("This is not a callback!") self.postForkCallback = callback def setNumWorkers(self, num): """Set the number of worker threads that should be created""" self.numWorkers = num def workerProcess(self): """Loop getting clients from the shared queue and process them""" if self.postForkCallback: self.postForkCallback() while self.isRunning.value: try: client = self.serverTransport.accept() if not client: continue self.serveClient(client) except (KeyboardInterrupt, SystemExit): return 0 except Exception as x: logger.exception(x) def serveClient(self, client): """Process input/output from a client for as long as possible""" itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) try: while True: self.processor.process(iprot, oprot) except TTransportException as tx: pass except Exception as x: logger.exception(x) itrans.close() otrans.close() def serve(self): """Start workers and put into queue""" # this is a shared state that can tell the workers to exit when False self.isRunning.value = True # first bind and listen to the port self.serverTransport.listen() # fork the children for i in range(self.numWorkers): try: w = Process(target=self.workerProcess) w.daemon = True w.start() self.workers.append(w) except Exception as x: logger.exception(x) # wait until the condition is set by stop() while True: self.stopCondition.acquire() try: self.stopCondition.wait() break except (SystemExit, KeyboardInterrupt): break except Exception as x: logger.exception(x) self.isRunning.value = False def stop(self): self.isRunning.value = False self.stopCondition.acquire() self.stopCondition.notify() self.stopCondition.release()
p = UserInteraction(taskid=taskids[i], date=date, dataset=dataset, users=users, queue=queue, finishSig=finishSig, scoretag=scoreTag) p.start() pool_processes.append(p) i += 1 else: finishSig.acquire() #print("pool full") if queue.empty() == True: finishSig.wait() rmPs = [] while queue.empty() == False: result = queue.get() user_m = result[1].toarray() taskid = result[0] data = {} data["users"] = users data["data"] = user_m ranks, names = rankOnDIG(data) data = {"users": names, "ranks": ranks} #print(ranks) #print(names) #exit(10) #print("fetched",taskid)
class SharedStorage(object): """ Uložiště synchronizované mezi procesy. """ class StorageType(Enum): LIST = "list" #SharedStorageList - wrapped manager.list() DICT = "DICT" #SharedStorageDict - wrapped manager.dict() DICT_SIMPLE = "DICT_SIMPLE" #manager.dict() Pokud nepotřebujeme pracovat s velkým objemem dat (nad 2GB), tak je vhodnější. def __init__(self, storageType, manager=None): """ Inicializace uložiště. :type storageType: StorageType :param storageType: Druh uložiště. Všechny podporované druhy vymezuje StorageType. :param manager: Volitelný parametr. Pokud chceme vnutit použití jiného multiprocessing.Manager. """ if manager is None: manager = Manager() # Type checking if not isinstance(storageType, self.StorageType): raise TypeError('storageType musí být instancí StorageType') #Zde budou ukládány data, if storageType == self.StorageType.LIST: self._storage = SharedList(manager) elif storageType == self.StorageType.DICT: self._storage = SharedDict(manager) elif storageType == self.StorageType.DICT_SIMPLE: self._storage = manager.dict() else: raise ValueError('Neznámý druh uložiště (storageType).') self.__usedManager = manager #Sdílený zámek pro synchronizaci procesů self.__sharedLock = Lock() #počet uložených klasifikátorů self._numOfData = Value(c_ulong, 0) self.__waitForChange = Condition() self.acquiredStorage = False def __len__(self): """ Zjištení počtu uložených dat. :return: Počet všech uložených dat. :rtype: int """ return self._numOfData.value def _notifyChange(self): """ Oznámí, že došlo ke změně připadným čekajícím. """ self.__waitForChange.acquire() self.__waitForChange.notify_all() self.__waitForChange.release() def waitForChange(self, timeout=None): """ Uspí proces dokud nenastane změna. Pokud měl proces přivlastněné uložiště, tak je uvolní a po probuzení zase přivlastní. :param timeout: Maximální počet sekund, které bude čekat. Může být None, pak čeká dokud nepřijde událost. """ wasAcquiredBeforeSleeping = False if self.acquiredStorage: self.release() wasAcquiredBeforeSleeping = True self.__waitForChange.acquire() self.__waitForChange.wait(timeout) self.__waitForChange.release() if wasAcquiredBeforeSleeping: self.acquire() def acquire(self): """ Přivlastní si uložiště pro sebe. Ostatní procesy musí čekat. """ self.__sharedLock.acquire() self.acquiredStorage = True def release(self): """ Uvolní uložiště pro ostatní procesy. """ self.__sharedLock.release() self.acquiredStorage = False def _safeAcquire(self): """ Přivlastnění si uložiště. V momentu, kdy chci měnit jeho stav. Zohledňuje případ, kdy je uložiště zamluveno pomocí acquire. """ if not self.acquiredStorage: self.__sharedLock.acquire() def _safeRelease(self): """ Uvolnění přístupu k uložišti. Zohledňuje případ, kdy je uložiště zamluveno pomocí acquire. """ if not self.acquiredStorage: self.__sharedLock.release()
class PolomaBuff: def __init__(self, table, workers=cpu_count(), maxconn=cpu_count(), maxbuff=50000, batchsize=5000, *args, **kwargs): self.table = table self.maxbuff = maxbuff self.maxconn = maxconn self.batchsize = batchsize self._args = args self._kwargs = kwargs self._queue = Queue() self._buffer_notifier = Condition() self._conn_notifier = Condition() self._conns = Value('i', 0) self._buffsize = Value('i', 0) self._sent = Value('i', 0) self._workers = 0 self._buffer = [] self._procs = [] self._spawn(workers) self._progress() def _progress(self): print('\tSENT:', self._sent.value, 'BUFFER:', self._buffsize.value, 'CONNS:', self._conns.value, 'WORKERS:', self._workers, 'CPU:', cpu_percent(), 'MEM:', virtual_memory().percent, ' ' * 10, end='\r') def _spawn(self, workers): for _ in range(workers): values = (self._sent, self._buffsize, self._conns, self._queue, self._buffer_notifier, self._conn_notifier) p = Process(target=self._worker, args=values) p.daemon = True self._procs.append(p) p.start() self._workers += 1 def _worker(self, _sent, _buffsize, _conns, _queue, _buffer_notifier, _conn_notifier): def _wait_if_max_conns(): _conn_notifier.acquire() while _conns.value >= self.maxconn: _conn_notifier.wait() _conn_notifier.release() def _send(_conn_notifier, _conns, _sent, _buffer): _conns.value += 1 c = PolomaConn(*self._args, **self._kwargs) is_nested(_buffer) c.insert(self.table, _buffer) c.commit() c.close() _conns.value -= 1 _notify(_conn_notifier) _sent.value += len(_buffer) def _notify(notifier): notifier.acquire() notifier.notify() notifier.release() while True: _buffer = _queue.get() _buffsize.value -= len(_buffer) _notify(_buffer_notifier) if _buffer == 'KILL': break _wait_if_max_conns() Thread(target=_send, args=(_conn_notifier, _conns, _sent, _buffer)).start() def _wait_if_buff_full(self): self._buffer_notifier.acquire() while self._buffsize.value >= self.maxbuff: self._buffer_notifier.wait() self._buffer_notifier.release() def append(self, item, batch=False): if batch: self._buffer += item else: self._buffer.append(item) self._wait_if_buff_full() if len(self._buffer) >= self.batchsize: self._buffsize.value += len(self._buffer) self._queue.put(self._buffer) self._progress() self._buffer = [] def kill(self): for _ in range(self._workers): self._queue.put('KILL') for p in self._procs: p.join() print()
class CountBucket(Query): """ Class for registering callbacks on counts of packets sent to the controller. """ def __init__(self): super(CountBucket, self).__init__() self.matches = set([]) self.runtime_stats_query_fun = None self.outstanding_switches = [] self.packet_count = 0 self.byte_count = 0 self.packet_count_persistent = 0 self.byte_count_persistent = 0 self.in_update_cv = Condition() self.in_update = False self._classifier = self.generate_classifier() def __repr__(self): return "CountBucket" def eval(self, pkt): """ evaluate this policy on a single packet :param pkt: the packet on which to be evaluated :type pkt: Packet :rtype: set Packet """ return set() def generate_classifier(self): return Classifier([Rule(identity,{self})]) def apply(self): with self.bucket_lock: for pkt in self.bucket: self.packet_count_persistent += 1 self.byte_count_persistent += pkt['header_len'] + pkt['payload_len'] self.bucket.clear() def start_update(self): """ Use a condition variable to mediate access to bucket state as it is being updated. Why condition variables and not locks? The main reason is that the state update doesn't happen in just a single function call here, since the runtime processes the classifier rule by rule and buckets may be touched in arbitrary order depending on the policy. They're not all updated in a single function call. In that case, (1) Holding locks *across* function calls seems dangerous and non-modular (in my opinion), since we need to be aware of this across a large function, and acquiring locks in different orders at different points in the code can result in tricky deadlocks (there is another lock involved in protecting bucket updates in runtime). (2) The "with" semantics in python is clean, and splitting that into lock.acquire() and lock.release() calls results in possibly replicated failure handling code that is boilerplate. """ with self.in_update_cv: self.in_update = True self.matches = set([]) self.runtime_stats_query_fun = None self.outstanding_switches = [] def finish_update(self): with self.in_update_cv: self.in_update = False self.in_update_cv.notify_all() def add_match(self, m): """ Add a match m to list of classifier rules to be queried for counts. """ if not m in self.matches: self.matches.add(m) def add_pull_stats(self, fun): """ Point to function that issues stats queries in the runtime. """ if not self.runtime_stats_query_fun: self.runtime_stats_query_fun = fun def pull_stats(self): """Issue stats queries from the runtime""" queries_issued = False with self.in_update_cv: while self.in_update: # ensure buckets not updated concurrently self.in_update_cv.wait() if not self.runtime_stats_query_fun is None: self.outstanding_switches = [] queries_issued = True self.runtime_stats_query_fun() # If no queries were issued, then no matches, so just call userland # registered callback routines if not queries_issued: self.packet_count = self.packet_count_persistent self.byte_count = self.byte_count_persistent for f in self.callbacks: f([self.packet_count, self.byte_count]) def add_outstanding_switch_query(self,switch): self.outstanding_switches.append(switch) def handle_flow_stats_reply(self,switch,flow_stats): """ Given a flow_stats_reply from switch s, collect only those counts which are relevant to this bucket. Very simple processing for now: just collect all packet and byte counts from rules that have a match that is in the set of matches this bucket is interested in. """ def stat_in_bucket(flow_stat, s): table_match = match(f['match']).intersect(match(switch=s)) network_match = match(f['match']) if table_match in self.matches or network_match in self.matches: return True return False with self.in_update_cv: while self.in_update: self.in_update_cv.wait() self.packet_count = self.packet_count_persistent self.byte_count = self.byte_count_persistent if switch in self.outstanding_switches: for f in flow_stats: if 'match' in f: if stat_in_bucket(f, switch): self.packet_count += f['packet_count'] self.byte_count += f['byte_count'] self.outstanding_switches.remove(switch) # If have all necessary data, call user-land registered callbacks if not self.outstanding_switches: for f in self.callbacks: f([self.packet_count, self.byte_count]) def __eq__(self, other): # TODO: if buckets eventually have names, equality should # be on names. return isinstance(other, CountBucket)
class DBPipeline(object): """ Like L{Exscript.workqueue.Pipeline}, but keeps all queued objects in a database, instead of using in-memory data structures. """ def __init__(self, engine, max_working = 1): self.condition = Condition(RLock()) self.engine = engine self.max_working = max_working self.running = False self.paused = False self.metadata = sa.MetaData(self.engine) self._table_prefix = 'exscript_pipeline_' self._table_map = {} self.__update_table_names() self.clear() def __add_table(self, table): """ Adds a new table to the internal table list. @type table: Table @param table: An sqlalchemy table. """ pfx = self._table_prefix self._table_map[table.name[len(pfx):]] = table def __update_table_names(self): """ Adds all tables to the internal table list. """ pfx = self._table_prefix self.__add_table(sa.Table(pfx + 'job', self.metadata, sa.Column('id', sa.Integer, primary_key = True), sa.Column('name', sa.String(150), index = True), sa.Column('status', sa.String(50), index = True), sa.Column('job', sa.PickleType()), mysql_engine = 'INNODB' )) @synchronized def install(self): """ Installs (or upgrades) database tables. """ self.metadata.create_all() @synchronized def uninstall(self): """ Drops all tables from the database. Use with care. """ self.metadata.drop_all() @synchronized def clear_database(self): """ Drops the content of any database table used by this library. Use with care. Wipes out everything, including types, actions, resources and acls. """ delete = self._table_map['job'].delete() delete.execute() def debug(self, debug = True): """ Enable/disable debugging. @type debug: bool @param debug: True to enable debugging. """ self.engine.echo = debug def set_table_prefix(self, prefix): """ Define a string that is prefixed to all table names in the database. @type prefix: string @param prefix: The new prefix. """ self._table_prefix = prefix self.__update_table_names() def get_table_prefix(self): """ Returns the current database table prefix. @rtype: string @return: The current prefix. """ return self._table_prefix def __len__(self): return self._table_map['job'].count().execute().fetchone()[0] def __contains__(self, item): return self.has_id(id(item)) def get_from_name(self, name): """ Returns the item with the given name, or None if no such item is known. """ with self.condition: tbl_j = self._table_map['job'] query = tbl_j.select(tbl_j.c.name == name) row = query.execute().fetchone() if row is None: return None return row.job def has_id(self, item_id): """ Returns True if the queue contains an item with the given id. """ tbl_j = self._table_map['job'] query = tbl_j.select(tbl_j.c.id == item_id).count() return query.execute().fetchone()[0] > 0 def task_done(self, item): with self.condition: self.working.remove(item) self.all.remove(id(item)) self.condition.notify_all() def append(self, item): with self.condition: self.queue.append(item) self.all.add(id(item)) self.condition.notify_all() def appendleft(self, item, force = False): with self.condition: if force: self.force.append(item) else: self.queue.appendleft(item) self.all.add(id(item)) self.condition.notify_all() def prioritize(self, item, force = False): """ Moves the item to the very left of the queue. """ with self.condition: # If the job is already running (or about to be forced), # there is nothing to be done. if item in self.working or item in self.force: return self.queue.remove(item) self.appendleft(item, force) self.condition.notify_all() def clear(self): with self.condition: self.queue = deque() self.force = deque() self.sleeping = set() self.working = set() self.all = set() self.condition.notify_all() def stop(self): """ Force the next() method to return while in another thread. The return value of next() will be None. """ with self.condition: self.running = False self.condition.notify_all() def pause(self): with self.condition: self.paused = True self.condition.notify_all() def unpause(self): with self.condition: self.paused = False self.condition.notify_all() def sleep(self, item): assert id(item) in self.all with self.condition: self.sleeping.add(item) self.condition.notify_all() def wake(self, item): assert id(item) in self.all assert item in self.sleeping with self.condition: self.sleeping.remove(item) self.condition.notify_all() def wait_for_id(self, item_id): with self.condition: while self.has_id(item_id): self.condition.wait() def wait(self): """ Waits for all currently running tasks to complete. """ with self.condition: while self.working: self.condition.wait() def wait_all(self): """ Waits for all queued and running tasks to complete. """ with self.condition: while len(self) > 0: self.condition.wait() def with_lock(self, function, *args, **kwargs): with self.condition: return function(self, *args, **kwargs) def set_max_working(self, max_working): with self.condition: self.max_working = int(max_working) self.condition.notify_all() def get_max_working(self): return self.max_working def get_working(self): return list(self.working) def _popleft_sleeping(self): sleeping = [] while True: try: node = self.queue[0] except IndexError: break if node not in self.sleeping: break sleeping.append(node) self.queue.popleft() return sleeping def _get_next(self): # We need to leave sleeping items in the queue because else we # would not know their original position after they wake up. # So we need to temporarily remove sleeping items from the top of # the queue here. sleeping = self._popleft_sleeping() # Get the first non-sleeping item from the queue. try: next = self.queue.popleft() except IndexError: next = None # Re-insert sleeping items. self.queue.extendleft(sleeping) return next def next(self): with self.condition: self.running = True while self.running: if self.paused: self.condition.wait() continue # Wait until enough slots are available. if len(self.working) - \ len(self.sleeping) - \ len(self.force) >= self.max_working: self.condition.wait() continue # Forced items are returned regardless of how many tasks # are already working. try: next = self.force.popleft() except IndexError: pass else: self.working.add(next) return next # Return the first non-sleeping task. next = self._get_next() if next is None: self.condition.wait() continue self.working.add(next) return next