def __init__(self, config, db_engine, *args, n_processes=1, n_db_processes=1, **kwargs): try: ForkingPickler.dumps(db_engine) except Exception as exc: raise TypeError( "multiprocessing is unable to pickle passed SQLAlchemy engine. " "use triage.create_engine instead when running MultiCoreExperiment: " "(e.g. from triage import create_engine)") from exc super(MultiCoreExperiment, self).__init__(config, db_engine, *args, **kwargs) if n_processes < 1: raise ValueError("n_processes must be 1 or greater") if n_db_processes < 1: raise ValueError("n_db_processes must be 1 or greater") if n_db_processes == 1 and n_processes == 1: logging.warning( "Both n_processes and n_db_processes were set to 1. " "If you only wish to use one process to run the experiment, " "consider using the SingleThreadedExperiment class instead") self.n_processes = n_processes self.n_db_processes = n_db_processes
def test_pickle_categorical(self): from multiprocessing.reduction import ForkingPickler from lale.lib.rasl import Project c = categorical(5) p = Project(columns=None, drop_columns=categorical(10)) _ = ForkingPickler.dumps(c) _ = ForkingPickler.dumps(p)
def test_stars_pickleable(): """ Verify that EPSFStars can be successfully pickled/unpickled for use multiprocessing """ from multiprocessing.reduction import ForkingPickler # Doesn't need to actually contain anything useful stars = EPSFStars([1]) # This should not blow up ForkingPickler.loads(ForkingPickler.dumps(stars))
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor) ForkingPickler.register(Variable, reduce_variable) ForkingPickler.register(Parameter, reduce_variable)
def test_multiprocessing(self, device=None, dtype=None): """multiprocessing sharing with 'device' and 'dtype'""" buf = io.BytesIO() t = MetaTensor([0.0, 0.0], device=device, dtype=dtype) if t.is_cuda: with self.assertRaises(NotImplementedError): ForkingPickler(buf).dump(t) return ForkingPickler(buf).dump(t) obj = ForkingPickler.loads(buf.getvalue()) self.assertIsInstance(obj, MetaTensor) assert_allclose(obj.as_tensor(), t)
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor) # TODO: Maybe this should be in tensor_classes? :) ForkingPickler.register(torch.Tensor, reduce_tensor) ForkingPickler.register(torch.nn.parameter.Parameter, reduce_tensor)
def __init__(self, config, db_engine, *args, n_processes=1, n_bigtrain_processes=1, n_db_processes=1, **kwargs): """ Args: config (dict) db_engine (sqlalchemy engine) n_processes (int) How many parallel processes to use for most CPU-bound tasks. Logistic regression and decision trees fall under this category. Usually good to set to the # of cores on the machine. n_bigtrain_processes (int) How many parallel processes to use for memory-intensive tasks Random forests and extra trees fall under this category. Usually good to start at 1, but can be increased if you have available memory. n_db_processes (int) How many parallel processes to use for database IO-intensive tasks. Cohort creation, label creation, and feature creation fall under this category. """ try: ForkingPickler.dumps(db_engine) except Exception as exc: raise TypeError( "multiprocessing is unable to pickle passed SQLAlchemy engine. " "use triage.create_engine instead when running MultiCoreExperiment: " "(e.g. from triage import create_engine)") from exc super(MultiCoreExperiment, self).__init__(config, db_engine, *args, **kwargs) if n_processes < 1: raise ValueError("n_processes must be 1 or greater") if n_db_processes < 1: raise ValueError("n_db_processes must be 1 or greater") if n_bigtrain_processes < 1: raise ValueError("n_bigtrain_processes must be 1 or greater") if n_db_processes == 1 and n_processes == 1 and n_bigtrain_processes == 1: logger.notice( "Both n_processes and n_db_processes were set to 1. " "If you only wish to use one process to run the experiment, " "consider using the SingleThreadedExperiment class instead") self.n_processes = n_processes self.n_db_processes = n_db_processes self.n_bigtrain_processes = n_bigtrain_processes self.n_processes_lookup = { BatchKey.QUICKTRAIN: self.n_processes, BatchKey.BIGTRAIN: self.n_bigtrain_processes, BatchKey.MAYBETRAIN: self.n_processes }
def init_reductions(): if not _supported_check(): return ForkingPickler.register(paddle.Tensor, reduce_tensor) ForkingPickler.register(paddle.fluid.core.VarBase, reduce_tensor) ForkingPickler.register(paddle.fluid.framework.ParamBase, reduce_tensor) ForkingPickler.register(paddle.fluid.core.LoDTensor, reduce_lodtensor)
def _feed(buffer, notempty, send_bytes, writelock, close, ignore_epipe, queue_sem): while True: try: with notempty: if not buffer: notempty.wait() try: while True: obj = buffer.popleft() if obj is _sentinel: debug('feeder thread got sentinel -- exiting') close() return obj = ForkingPickler.dumps(obj) if sys.platform == 'win32': send_bytes(obj) else: with writelock: send_bytes(obj) except IndexError: # 当buffer为空时popleft会抛出异常 pass except Exception as e: if ignore_epipe and getattr(e, 'errno', 0) == errno.EPIPE: return # Since this runs in a daemon thread the resources it uses may be become unusable while the process is cleaning up. # We ignore errors which happen after the process has started to cleanup. if is_exiting(): info('error in queue thread: %s', e) return else: # Since the object has not been sent in the queue, we need to decrease the size of the queue. # The error acts as if the object had been silently removed from the queue and this step is necessary to have a properly working queue. queue_sem.release() traceback.print_exc()
def worker_fn( worker_id: int, num_workers: int, dataset, batch_size: int, stack_fn: Callable, batch_queue: mp.Queue, terminate_event, exhausted_event, ): MPWorkerInfo.set_worker_info( num_workers=num_workers, worker_id=worker_id, ) for batch in batcher(dataset, batch_size): stacked_batch = stack_fn(batch) try: if terminate_event.is_set(): return buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump( (worker_id, stacked_batch)) batch_queue.put(buf.getvalue()) except (EOFError, BrokenPipeError): return exhausted_event.set()
def _worker_fn( batch_size: int, batchify_fn: Callable, dtype: DType, is_train: bool, cyclic: bool, cycle_num: int, shuffle: bool, shuffle_buffer: multiprocessing.Manager().list, shuffle_buffer_length: int, ): """Function for processing data in worker process.""" # initialize, or reset the iterator at each cycle if (_WorkerData.iterator_latest_reset_cycle < cycle_num) and ( _WorkerData.iterator_latest_reset_cycle == 0 or not cyclic): _worker_reset_iterator(is_train, cyclic, cycle_num) # retrieve the samples that will be batched if not shuffle: batch_samples = list( itertools.islice(_WorkerData.dataset_iterator, batch_size)) else: if len(shuffle_buffer) == 0: for sample in list( itertools.islice(_WorkerData.dataset_iterator, shuffle_buffer_length)): shuffle_buffer.append(sample) next_samples = list( itertools.islice(_WorkerData.dataset_iterator, batch_size)) batch_samples = [] # O(batch_size) since every operation in the loop is O(1) for i in range(batch_size): idx = random.randint(0, shuffle_buffer_length - 1) batch_samples.append(shuffle_buffer[idx]) shuffle_buffer[idx] = next_samples[i] # batch the samples, if there were any if batch_samples: success = True batch = batchify_fn(data=batch_samples, dtype=dtype, multi_processing=True) else: # the second time without being able to provide a batch we want to delay calling them again # on fist exhaustion they should not be delayed, since they need to indicate depletion # dont make the penalty to high, since that delays rescheduling of non empty iterators if _WorkerData.iterator_exhausted_indicator: time.sleep(0.05) else: _WorkerData.iterator_exhausted_indicator = True success = False batch = None buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump( (success, MPWorkerInfo.worker_id, batch)) return buf.getvalue()
def put(self, obj): obj = ForkingPickler.dumps(obj) if self._wlock is None: self._writer.send_bytes( obj) # writes to a message oriented win32 pipe are atomic else: with self._wlock: self._writer.send_bytes(obj)
def _validate_nodes(cls, nodes: Iterable[Node]): """Ensure all tasks are serializable.""" unserializable = [] for node in nodes: try: ForkingPickler.dumps(node) except (AttributeError, PicklingError): unserializable.append(node) if unserializable: raise AttributeError( "The following nodes cannot be serialized: {}\nIn order to " "utilize multiprocessing you need to make sure all nodes are " "serializable, i.e. nodes should not include lambda " "functions, nested functions, closures, etc.\nIf you " "are using custom decorators ensure they are correctly using " "functools.wraps().".format(sorted(unserializable)))
def put(self, obj): # serialize the data before acquiring the lock obj = LokyPickler.dumps(obj) if self._wlock is None: # writes to a message oriented win32 pipe are atomic self._writer.send_bytes(obj) else: with self._wlock: self._writer.send_bytes(obj)
def _worker_fn(samples, batchify_fn): """Function for processing data in worker process.""" # it is required that each worker process has to fork a new MXIndexedRecordIO handle # preserving dataset as global variable can save tons of overhead and is safe in new process global _worker_dataset batch = batchify_fn([_worker_dataset[i] for i in samples]) buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(batch) return buf.getvalue()
def _feed(buffer, notempty, send_bytes, writelock, close, ignore_epipe): debug('starting thread to feed data to pipe') nacquire = notempty.acquire nrelease = notempty.release nwait = notempty.wait bpopleft = buffer.popleft sentinel = _sentinel if sys.platform != 'win32': wacquire = writelock.acquire wrelease = writelock.release else: wacquire = None try: while 1: nacquire() try: if not buffer: nwait() finally: nrelease() try: while 1: obj = bpopleft() if obj is sentinel: debug('feeder thread got sentinel -- exiting') close() return # serialize the data before acquiring the lock obj = LokyPickler.dumps(obj) if wacquire is None: send_bytes(obj) else: wacquire() try: send_bytes(obj) finally: wrelease() except IndexError: pass except Exception as e: if ignore_epipe and getattr(e, 'errno', 0) == errno.EPIPE: return # Since this runs in a daemon thread the resources it uses # may be become unusable while the process is cleaning up. # We ignore errors which happen after the process has # started to cleanup. try: if is_exiting(): info('error in queue thread: %s', e) else: import traceback traceback.print_exc() except Exception: pass
def _validate_catalog(cls, catalog: DataCatalog, pipeline: Pipeline): """Ensure that all data sets are serializable and that we do not have any non proxied memory data sets being used as outputs as their content will not be synchronized across threads. """ data_sets = catalog._data_sets # pylint: disable=protected-access unserializable = [] for name, data_set in data_sets.items(): if getattr(data_set, "_SINGLE_PROCESS", False): # SKIP_IF_NO_SPARK unserializable.append(name) continue try: ForkingPickler.dumps(data_set) except (AttributeError, PicklingError): unserializable.append(name) if unserializable: raise AttributeError( "The following data sets cannot be used with multiprocessing: " "{}\nIn order to utilize multiprocessing you need to make sure " "all data sets are serializable, i.e. data sets should not make " "use of lambda functions, nested functions, closures etc.\nIf you " "are using custom decorators ensure they are correctly using " "functools.wraps().".format(sorted(unserializable)) ) memory_data_sets = [] for name, data_set in data_sets.items(): if ( name in pipeline.all_outputs() and isinstance(data_set, MemoryDataSet) and not isinstance(data_set, BaseProxy) ): memory_data_sets.append(name) if memory_data_sets: raise AttributeError( "The following data sets are memory data sets: {}\n" "ParallelRunner does not support output to externally created " "MemoryDataSets".format(sorted(memory_data_sets)) )
def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.lnprob_extra = lnprob_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl)
def put(self, *args: T) -> None: """ Put zero or more objects into the queue, contiguously. Raises BrokenPipeError if the receiving half has hung up. """ if args: send = [ForkingPickler.dumps(arg) for arg in args] with self._write_lock: while send: self._writer.send_bytes(send.pop(0))
def get(self, timeout: typing.Optional[float] = None) -> T: """ Get one object from the queue raising queue.Empty if unavailable. Raises EOFError on exhausted queue whenever sending half has hung up. """ with self._read_lock: if self._reader.poll(timeout): recv = self._reader.recv_bytes() else: raise queue.Empty return ForkingPickler.loads(recv)
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor)
async def _start(self, loop): import inspect data = { 'path': sys.path.copy(), 'impl': bytes(ForkingPickler.dumps(self)), 'main': inspect.getfile(sys.modules['__main__']), 'authkey': bytes(current_process().authkey) } self.process = await asyncio.create_subprocess_exec( sys.executable, SUBPROCESS, stdin=asyncio.subprocess.PIPE, loop=loop) await self.process.communicate(pickle.dumps(data))
async def _start(self, loop): import inspect data = { 'path': sys.path.copy(), 'impl': bytes(ForkingPickler.dumps(self)), 'main': inspect.getfile(sys.modules['__main__']), 'authkey': bytes(current_process().authkey) } self.process = await asyncio.create_subprocess_exec( sys.executable, SUBPROCESS, stdin=asyncio.subprocess.PIPE, loop=loop ) await self.process.communicate(pickle.dumps(data))
def _worker_fn(samples, transform_fn, batchify_fn): """Function for processing data in worker process.""" # it is required that each worker process has to fork a new MXIndexedRecordIO handle # preserving dataset as global variable can save tons of overhead and is safe in new process global _worker_dataset t_dataset = _worker_dataset.transform(transform_fn) batch = [] for i in samples: try: data = t_dataset[i] batch.append(data) except: continue batch = batchify_fn(batch) buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(batch) return buf.getvalue()
def _worker_fn( batch_size: int, batchify_fn: Callable, dtype: DType, is_train: bool, shuffle: bool, num_batches_for_shuffling: int, cyclic: bool, cycle_num: int, ): """Function for processing data in worker process.""" # initialize, or reset the iterator at each cycle if (_WorkerData.iterator_latest_reset_cycle < cycle_num) and ( _WorkerData.iterator_latest_reset_cycle == 0 or not cyclic): _worker_reset_iterator(is_train, cyclic, cycle_num, num_batches_for_shuffling) # retrieve the samples that will be batched batch_samples = list( itertools.islice(_WorkerData.dataset_iterator, batch_size)) if shuffle: random.shuffle(batch_samples) # batch the samples, if there were any if batch_samples: success = True batch = batchify_fn(data=batch_samples, dtype=dtype, multi_processing=True) else: # the second time without being able to provide a batch we want to delay calling them again # on fist exhaustion they should not be delayed, since they need to indicate depletion # dont make the penalty to high, since that delays rescheduling of non empty iterators if _WorkerData.iterator_exhausted_indicator: time.sleep(0.05) else: _WorkerData.iterator_exhausted_indicator = True success = False batch = None buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump( (success, MPWorkerInfo.worker_id, batch)) return buf.getvalue()
def _worker_fn( batch_size: int, batchify_fn: Callable, dtype: DType, is_train: bool, shuffle: bool, cyclic: bool, cycle_num: int, ): """Function for processing data in worker process.""" # initialize, or reset the iterator at each cycle assert isinstance(_WorkerData.iterator_latest_reset_cycle, int) if (_WorkerData.iterator_latest_reset_cycle < cycle_num) and ( _WorkerData.iterator_latest_reset_cycle == 0 or not cyclic): _worker_reset_iterator(is_train, cyclic, cycle_num) assert isinstance( _WorkerData.dataset_iterator, Iterable ), f"Dataset not Iterable: {type(_WorkerData.dataset_iterator)}." transformed_data = list( itertools.islice(_WorkerData.dataset_iterator, batch_size)) if shuffle: random.shuffle(transformed_data) if transformed_data: success = True batch = batchify_fn(data=transformed_data, dtype=dtype, multi_processing=True) else: # the second time without being able to provide a batch we want to delay calling them again # on fist exhaustion they should not be delayed, since they need to indicate depletion if _WorkerData.iterator_exhausted_indicator: time.sleep(0.1) else: _WorkerData.iterator_exhausted_indicator = True success = False batch = None buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump( (success, MPWorkerInfo.worker_id, batch)) return buf.getvalue()
def data_received(self, data): # I'm worried this will be slower... data = self._data + data while True: size, = struct.unpack("!i", data[:4]) if len(data) < size: self._data = data print('breaking') break node = ForkingPickler.loads(data[4:4 + size]) # ARE YOU SERIOUS if self.render_: # FIXME either of these cause panda related segfaults # only on athena >_< # on the other hand, doesn't exit as fast on luz #node.reparentTo(self.collRoot) self.coll_add_queue.append(node) #self.nodes.append(node) self.cache[self.request_hash] = self.geom, node, self.ui data = data[4 + size:] if not data: break
def data_received(self, data): # I'm worried this will be slower... data = self._data + data while True: size, = struct.unpack("!i", data[:4]) if len(data) < size: self._data = data print('breaking') break node = ForkingPickler.loads(data[4:4+size]) # ARE YOU SERIOUS if self.render_: # FIXME either of these cause panda related segfaults # only on athena >_< # on the other hand, doesn't exit as fast on luz #node.reparentTo(self.collRoot) self.coll_add_queue.append(node) #self.nodes.append(node) self.cache[self.request_hash] = self.geom, node, self.ui data = data[4+size:] if not data: break
def compress_task(sock, handler_modulename, handler_classname, port): """ Compress all the inputs required for a task into a tuple Args: sock (socket): Receiving socket handler_modulename (str): Modulename of the handler to be used handler_classname (str): Classname of the handler to be used port (int): incoming port Returns: tuple: All information suitable to put as a task in a queue """ """ Pickle a socket This is required to pass the socket in multiprocessing""" buf = BytesIO() ForkingPickler(buf).dump(sock) pickled_socket = buf.getvalue() task = pickled_socket, handler_modulename, handler_classname, port return task
def get(self, block=True, timeout=None): if block and timeout is None: with self._rlock: res = self._recv_bytes() self._sem.release() else: if block: deadline = time.monotonic() + timeout if not self._rlock.acquire(block, timeout): raise Empty try: if block: timeout = deadline - time.monotonic() # 减掉获取进程锁_rlock耗时 if not self._poll(timeout): raise Empty elif not self._poll(): raise Empty res = self._recv_bytes() self._sem.release() finally: self._rlock.release() return ForkingPickler.loads(res)
def get(self, block=True, timeout=None): if block and timeout is None: with self._rlock: res = self._recv_bytes() self._sem.release() else: if block: deadline = time.time() + timeout if not self._rlock.acquire(block, timeout): raise Empty try: if block: timeout = deadline - time.time() if timeout < 0 or not self._poll(timeout): raise Empty elif not self._poll(): raise Empty res = self._recv_bytes() self._sem.release() finally: self._rlock.release() # unserialize the data after having released the lock return LokyPickler.loads(res)
def process_data(self): while True: size, = struct.unpack("!i", self.__data[:4]) if len(self.__data) < size: print('yielding') yield from asyncio.sleep(1, loop=self.event_loop) continue #yield self.__data[4:4+size] node = ForkingPickler.loads(self.__data[4:4+size]) self.cache[self.request_hash] = self.geom, node, self.ui print(node) if self.render_: # FIXME either of these cause panda related segfaults # only on athena >_< # on the other hand, doesn't exit as fast on luz node.reparentTo(self.collRoot) #self.coll_add_queue.append(node) #self.nodes.append(node) self.__data = self.__data[4+size:] if not self.__data: print('I only get here once!') break elif len(self.__data) < 4: #self._data = data continue
def process_data(self): while True: size, = struct.unpack("!i", self.__data[:4]) if len(self.__data) < size: print('yielding') yield from asyncio.sleep(1, loop=self.event_loop) continue #yield self.__data[4:4+size] node = ForkingPickler.loads(self.__data[4:4 + size]) self.cache[self.request_hash] = self.geom, node, self.ui print(node) if self.render_: # FIXME either of these cause panda related segfaults # only on athena >_< # on the other hand, doesn't exit as fast on luz node.reparentTo(self.collRoot) #self.coll_add_queue.append(node) #self.nodes.append(node) self.__data = self.__data[4 + size:] if not self.__data: print('I only get here once!') break elif len(self.__data) < 4: #self._data = data continue
else: fd = fd.detach() return nd.NDArray(nd.ndarray._new_from_shared_mem(pid, fd, shape, dtype)) def reduce_ndarray(data): """Reduce ndarray to shared memory handle""" # keep a local ref before duplicating fd data = data.as_in_context(context.Context('cpu_shared', 0)) pid, fd, shape, dtype = data._to_shared_mem() if sys.version_info[0] == 2: fd = multiprocessing.reduction.reduce_handle(fd) else: fd = multiprocessing.reduction.DupFd(fd) return rebuild_ndarray, (pid, fd, shape, dtype) ForkingPickler.register(nd.NDArray, reduce_ndarray) class ConnectionWrapper(object): """Connection wrapper for multiprocessing that supports sending NDArray via shared memory.""" def __init__(self, conn): self._conn = conn def send(self, obj): """Send object""" buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj) self.send_bytes(buf.getvalue())