def _state_reduce(self, obj): # FIXME: this code is not as complete as pickle's reduce # handling code and is likely to not work in all cases. md = obj.__METADATA__ func = md.get('class') func_md = func.__METADATA__ args = md.get('initargs') state = dict(obj.__dict__) state.pop('__METADATA__') # This API is called by some subclasses # Assert that args is a tuple or None if not isinstance(args, tuple): if args is None: # A hack for Jim Fulton's ExtensionClass, now deprecated. # See load_reduce() warnings.warn("__basicnew__ special case is deprecated", DeprecationWarning) else: raise PicklingError( "args from reduce() should be a tuple") # Assert that func is callable #if not callable(func): # raise PicklingError("func from reduce should be callable") save = self.save write = self.write # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ if self.proto >= 2 and func_md.get("name", "") == "__newobj__": # FIXME: this is unlikely to work. cls = args[0] if not hasattr(cls, "__new__"): raise PicklingError( "args[0] from __newobj__ args has no __new__") if obj is not None and cls is not obj.__class__: raise PicklingError( "args[0] from __newobj__ args has the wrong class") args = args[1:] save(cls) save(args) write(NEWOBJ) else: save(func) save(args) write(REDUCE) if obj is not None: self.memoize(obj) if state is not None: if '__setstate_data__' in state: data = state.pop('__setstate_data__') save(data) else: save(state) write(BUILD)
def save_pandas_object(self, obj): if hasattr(obj, '__getstate__'): self.save(obj, bypass_dispatch=True) return reduce = getattr(obj, "__reduce_ex__", None) if reduce is not None: rv = reduce(self.proto) else: reduce = getattr(obj, "__reduce__", None) if reduce is not None: rv = reduce() else: raise PicklingError("Can't pickle %r object: %r" % (type(obj).__name__, obj)) # Assert that reduce() returned a tuple if isinstance(rv, str): self.save_global(obj, rv) return if not isinstance(rv, tuple): raise PicklingError("%s must return string or tuple" % reduce) if rv[2] is not None and isinstance(rv, dict): if '_cache' in rv[2]: del rv[2]['_cache'] if '_cacher' in rv[2]: del rv[2]['_cacher'] if '_ordered' in rv[2]: del rv[2]['_ordered'] self.save_reduce(obj=obj, *rv)
def save_reduce(coder, func, args, state=None, listitems=None, dictitems=None, obj=None): if not isinstance(args, TupleType): raise PicklingError("args from reduce() should be a tuple") if not callable(func): raise PicklingError("func from reduce should be callable") coder.encodeInt_forKey_(kOP_REDUCE, kKIND) coder.encodeObject_forKey_(func, kFUNC) coder.encodeObject_forKey_(args, kARGS) if listitems is None: coder.encodeObject_forKey_(None, kLIST) else: coder.encodeObject_forKey_(list(listitems), kLIST) if dictitems is None: coder.encodeObject_forKey_(None, kDICT) else: coder.encodeObject_forKey_(dict(dictitems), kDICT) coder.encodeObject_forKey_(state, kSTATE)
def save_global(coder, obj, name=None): if name is None: name = obj.__name__ module = getattr(obj, "__module__", None) if module is None: module = whichmodule(obj, name) try: __import__(module) mod = sys.modules[module] klass = getattr(mod, name) except (ImportError, KeyError, AttributeError): raise PicklingError("Can't pickle %r: it's not found as %s.%s" % (obj, module, name)) else: if klass is not obj: raise PicklingError( "Can't pickle %r: it's not the same object as %s.%s" % (obj, module, name)) code = copy_reg._extension_registry.get((module, name)) if code: coder.encodeInt_forKey_(kOP_GLOBAL_EXT, kKIND) coder.encodeInt_forKey_(code, kCODE) else: coder.encodeInt_forKey_(kOP_GLOBAL, kKIND) coder.encodeObject_forKey_(unicode(module), kMODULE) coder.encodeObject_forKey_(unicode(name), kNAME)
def save_global(self, obj, name=None): # unfortunately the pickler code is factored in a way that # forces us to copy/paste this function. The only change is marked # CHANGED below. write = self.write memo = self.memo # CHANGED: import module from module environment instead of __import__ try: module_name, name = self.importer.get_name(obj, name) except (ObjNotFoundError, ObjMismatchError) as err: raise PicklingError(f"Can't pickle {obj}: {str(err)}") from None module = self.importer.import_module(module_name) _, parent = _getattribute(module, name) # END CHANGED if self.proto >= 2: code = _extension_registry.get((module_name, name)) if code: assert code > 0 if code <= 0xFF: write(EXT1 + pack("<B", code)) elif code <= 0xFFFF: write(EXT2 + pack("<H", code)) else: write(EXT4 + pack("<i", code)) return lastname = name.rpartition(".")[2] if parent is module: name = lastname # Non-ASCII identifiers are supported only with protocols >= 3. if self.proto >= 4: self.save(module_name) self.save(name) write(STACK_GLOBAL) elif parent is not module: self.save_reduce(getattr, (parent, lastname)) elif self.proto >= 3: write(GLOBAL + bytes(module_name, "utf-8") + b"\n" + bytes(name, "utf-8") + b"\n") else: if self.fix_imports: r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING if (module_name, name) in r_name_mapping: module_name, name = r_name_mapping[(module_name, name)] elif module_name in r_import_mapping: module_name = r_import_mapping[module_name] try: write(GLOBAL + bytes(module_name, "ascii") + b"\n" + bytes(name, "ascii") + b"\n") except UnicodeEncodeError: raise PicklingError( "can't pickle global identifier '%s.%s' using " "pickle protocol %i" % (module, name, self.proto)) from None self.memoize(obj)
def pyobjectEncode(self, coder): t = type(self) # Find builtin support f = encode_dispatch.get(t) if f is not None: f(coder, self) return # Check for a class with a custom metaclass # XXX: pickle.py catches TypeError here, that's for # compatibility with ancient versions of Boost # (before Python 2.2) and is not needed here. issc = issubclass(t, type) if issc: save_global(coder, self) return # Check copyreg.dispatch_table reduce = copyreg.dispatch_table.get(t) if reduce is not None: rv = reduce(self) else: reduce = getattr(self, "__reduce_ex__", None) if reduce is not None: rv = reduce(2) else: # pragma: no cover # This path will never be used because object implements # __reduce_ex__ (at least in python2.6 and later) rv = getattr(self, "__reduce__", None) if reduce is not None: rv = reduce() else: raise PicklingError("Can't pickle %r object: %r" % (t.__name__, self)) if type(rv) is str: save_global(coder, self, rv) return if type(rv) is not tuple: raise PicklingError("%s must return string or tuple" % reduce) l = len(rv) if not (2 <= l <= 5): raise PicklingError("Tuple returned by %s must have two to " "five elements" % reduce) save_reduce(coder, *rv)
def pyobjectEncode(self, coder): t = type(self) # Find builtin support f = encode_dispatch.get(t) if f is not None: f(coder, self) return # Check for a class with a custom metaclass try: issc = issubclass(t, TypeType) except TypeError: issc = 0 if issc: save_global(coder, self) return # Check copy_reg.dispatch_table reduce = copy_reg.dispatch_table.get(t) if reduce is not None: rv = reduce(self) else: reduce = getattr(self, "__reduce_ex__", None) if reduce is not None: rv = reduce(2) else: rv = getattr(self, "__reduce__", None) if reduce is not None: rv = reduce() else: raise PicklingError("Can't pickle %r object: %r" % (t.__name__, self)) if type(rv) is StringType: save_global(coder, rv) return if type(rv) is not TupleType: raise PicklingError("%s must return string or tuple" % reduce) l = len(rv) if not (2 <= l <= 5): raise PicklingError("Tuple returned by %s must have two to " "five elements" % reduce) save_reduce(coder, *rv)
def __reduce__(self) -> None: raise PicklingError( "Lint rule reports are potentially very complex objects. They can contain " + "a syntax tree or an entire module's source code. They should not be " + "pickled (or returned by a multiprocessing worker). Instead, extract " + "the fields you care about, and pickle those." )
def likelihood_contour_data(log_likelihood, x_min, x_max, y_min, y_max, n_sigma=1, steps=20, threads=1, pool=None): r"""Generate data required to plot coloured confidence contours (or bands) given a log likelihood function. Parameters: - `log_likelihood`: function returning the logarithm of the likelihood. Can e.g. be the method of the same name of a FastFit instance. - `x_min`, `x_max`, `y_min`, `y_max`: data boundaries - `n_sigma`: plot confidence level corresponding to this number of standard deviations. Either a number (defaults to 1) or a tuple to plot several contours. - `steps`: number of grid steps in each dimension (total computing time is this number squared times the computing time of one `log_likelihood` call!) - `threads`: number of threads, defaults to 1. If greater than one, computation of z values will be done in parallel. - `pool`: an instance of `multiprocessing.Pool` (or a compatible implementation, e.g. from `multiprocess` or `schwimmbad`). Overrides the `threads` argument. """ _x = np.linspace(x_min, x_max, steps) _y = np.linspace(y_min, y_max, steps) x, y = np.meshgrid(_x, _y) if threads == 1: @np.vectorize def chi2_vect(x, y): # needed for evaluation on meshgrid return -2 * log_likelihood([x, y]) z = chi2_vect(x, y) else: xy = np.array([x, y]).reshape(2, steps**2).T pool = pool or Pool(threads) try: z = -2 * np.array(pool.map(log_likelihood, xy)).reshape( (steps, steps)) except PicklingError: pool.close() raise PicklingError( "When using more than 1 thread, the " "log_likelihood function must be picklable; " "in particular, you cannot use lambda expressions.") pool.close() pool.join() z = z - np.min(z) # subtract the best fit point (on the grid) # get the correct values for 2D confidence/credibility contours for n sigma if isinstance(n_sigma, Number): levels = [delta_chi2(n_sigma, dof=2)] else: levels = [delta_chi2(n, dof=2) for n in n_sigma] return {'x': x, 'y': y, 'z': z, 'levels': levels}
def _on_queue_feeder_error(self, e, obj): if isinstance(obj, _CallItem): # format traceback only works on python3 if isinstance(e, struct.error): raised_error = RuntimeError( "The task could not be sent to the workers as it is too " "large for `send_bytes`.") else: raised_error = PicklingError( "Could not pickle the task to send it to the workers.") tb = traceback.format_exception(type(e), e, getattr(e, "__traceback__", None)) raised_error = set_cause(raised_error, _RemoteTraceback(''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) self.running_work_items.remove(obj.work_id) # work_item can be None if another process terminated. In this # case, the executor_manager_thread fails all work_items with # BrokenProcessPool if work_item is not None: work_item.future.set_exception(raised_error) del work_item self.thread_wakeup.wakeup() else: super(_SafeQueue, self)._on_queue_feeder_error(e, obj)
def joblib_parmap(func, generator): """ parallel map using joblib, but it pickles input arguments and thus can't be used for dynamically generated functions. """ try: new_func = delayed(func) except TypeError as e: raise PicklingError(e) return joblib_run(new_func(item) for item in generator)
def get_texts(self): logger = logging.getLogger(__name__) articles, articles_all = 0, 0 positions, positions_all = 0, 0 tokenization_params = (self.tokenizer_func, self.token_min_len, self.token_max_len, self.lower) texts = ( (text, self.lemmatize, title, pageid, tokenization_params) for title, text, pageid in extract_pages(bz2.BZ2File( self.fname), self.filter_namespaces, self.filter_articles)) pool = multiprocessing.Pool(self.processes, init_to_ignore_interrupt) try: # process the corpus in smaller chunks of docs, # because multiprocessing.Pool # is dumb and would load the entire input into RAM at once... for group in gensim.utils.chunkize(texts, chunksize=10 * self.processes, maxsize=1): for tokens, title, pageid in pool.imap(_process_article, group): articles_all += 1 positions_all += len(tokens) # article redirects and short stubs are pruned here if len(tokens) < self.article_min_tokens or \ any(title.startswith(ignore + ':') for ignore in IGNORED_NAMESPACES): continue articles += 1 positions += len(tokens) yield (tokens, (pageid, title)) except KeyboardInterrupt: logger.warn( "user terminated iteration over Wikipedia corpus after %i" " documents with %i positions " "(total %i articles, %i positions before pruning articles" " shorter than %i words)", articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS) except PicklingError as exc: raise_from( PicklingError( 'Can not send filtering function {} to multiprocessing, ' 'make sure the function can be pickled.'.format( self.filter_articles)), exc) else: logger.info( "finished iterating over Wikipedia corpus of %i " "documents with %i positions " "(total %i articles, %i positions before pruning articles" " shorter than %i words)", articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS) self.length = articles # cache corpus length finally: pool.terminate()
def __str__( self ): base = PicklingError.__str__( self ) if hasattr( self, 'stack' ): stack = self.stack[:3] stack.reverse() base = "%s\nLinearisation Stack:\n\t%s"%( base, "\n\t".join(map(repr,self.stack[-3:])) ) return base
def __getstate__(self): """Explicitly state that clients are not pickleable.""" raise PicklingError( "\n".join( [ "Pickling client objects is explicitly not supported.", "Clients have non-trivial state that is local and unpickleable.", ] ) )
def save_reduce(coder, func, args, state=None, listitems=None, dictitems=None, obj=None): if not isinstance(args, tuple): raise PicklingError("args from reduce() should be a tuple") if not callable(func): raise PicklingError("func from reduce should be callable") if coder.allowsKeyedCoding(): coder.encodeInt_forKey_(kOP_REDUCE, kKIND) coder.encodeObject_forKey_(func, kFUNC) coder.encodeObject_forKey_(args, kARGS) if listitems is None: coder.encodeObject_forKey_(None, kLIST) else: coder.encodeObject_forKey_(list(listitems), kLIST) if dictitems is None: coder.encodeObject_forKey_(None, kDICT) else: coder.encodeObject_forKey_(dict(dictitems), kDICT) coder.encodeObject_forKey_(state, kSTATE) else: coder.encodeValueOfObjCType_at_(objc._C_INT, kOP_REDUCE) coder.encodeObject_(func) coder.encodeObject_(args) if listitems is None: coder.encodeObject_(None) else: coder.encodeObject_(list(listitems)) if dictitems is None: coder.encodeObject_(None) else: coder.encodeObject_(dict(dictitems)) coder.encodeObject_(state)
def save_global(coder, obj, name=None): if name is None: name = getattr(obj, "__qualname__", None) if name is None: name = obj.__name__ module_name = whichmodule(obj, name) try: module = import_module(module_name) obj2 = _getattribute(module, name) except (ImportError, KeyError, AttributeError): raise PicklingError( "Can't pickle %r: it's not found as %s.%s" % (obj, module_name, name) ) else: if obj2 is not obj: raise PicklingError( "Can't pickle %r: it's not the same object as %s.%s" % (obj, module_name, name) ) code = copyreg._extension_registry.get((module_name, name)) if coder.allowsKeyedCoding(): if code: coder.encodeInt_forKey_(kOP_GLOBAL_EXT, kKIND) coder.encodeInt_forKey_(code, kCODE) else: coder.encodeInt_forKey_(kOP_GLOBAL, kKIND) coder.encodeObject_forKey_(unicode(module_name), kMODULE) coder.encodeObject_forKey_(unicode(name), kNAME) else: if code: coder.encodeValueOfObjCType_at_(objc._C_INT, kOP_GLOBAL_EXT) coder.encodeValueOfObjCType_at_(objc._C_INT, code) else: coder.encodeValueOfObjCType_at_(objc._C_INT, kOP_GLOBAL) coder.encodeObject_(unicode(module_name)) coder.encodeObject_(unicode(name))
def pyobjectEncode(self, coder): t = type(self) # Find builtin support f = encode_dispatch.get(t) if f is not None: f(coder, self) return # Check for a class with a custom metaclass # NOTE: pickle.py catches TypeError here, that's for # compatibility with ancient versions of Boost # (before Python 2.2) and is not needed here. issc = issubclass(t, type) if issc: save_global(coder, self) return # Check copyreg.dispatch_table reduce = copyreg.dispatch_table.get(t) if reduce is not None: rv = reduce(self) else: reduce = getattr(self, "__reduce_ex__", None) rv = reduce(2) if type(rv) is str: save_global(coder, self, rv) return if type(rv) is not tuple: raise PicklingError("%s must return string or tuple" % reduce) l = len(rv) if not (2 <= l <= 5): raise PicklingError( "Tuple returned by %s must have two to " "five elements" % reduce ) save_reduce(coder, *rv)
def _on_queue_feeder_error(self, e, obj): if isinstance(obj, _CallItem): # fromat traceback only on python3 pickling_error = PicklingError( "Could not pickle the task to send it to the workers.") tb = traceback.format_exception( type(e), e, getattr(e, "__traceback__", None)) pickling_error.__cause__ = _RemoteTraceback( '\n"""\n{}"""'.format(''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) self.running_work_items.remove(obj.work_id) # work_item can be None if another process terminated. In this # case, the queue_manager_thread fails all work_items with # BrokenProcessPool if work_item is not None: work_item.future.set_exception(pickling_error) del work_item self.thread_wakeup.wakeup() else: super()._on_queue_feeder_error(e, obj)
def _save_pretrained_fastai( learner, save_directory: str, config: Optional[Dict[str, Any]] = None, ): """ Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used. Args: learner (`Learner`): The `fastai.Learner` you'd like to save. save_directory (`str`): Specific directory in which you want to save the fastai learner. config (`dict`, *optional*): Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'. <Tip> Raises the following error: - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) if the config file provided is not a dictionary. </Tip> """ _check_fastai_fastcore_versions() os.makedirs(save_directory, exist_ok=True) # if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE. if config is not None: if not isinstance(config, dict): raise RuntimeError( f"Provided config should be a dict. Got: '{type(config)}'") path = os.path.join(save_directory, CONFIG_NAME) with open(path, "w") as f: json.dump(config, f) _create_model_card(Path(save_directory)) _create_model_pyproject(Path(save_directory)) # learner.export saves the model in `self.path`. learner.path = Path(save_directory) os.makedirs(save_directory, exist_ok=True) try: learner.export( fname="model.pkl", pickle_protocol=DEFAULT_PROTOCOL, ) except PicklingError: raise PicklingError( "You are using a lambda function, i.e., an anonymous function. `pickle`" " cannot pickle function objects and requires that all functions have" " names. One possible solution is to name the function.")
def _on_queue_feeder_error(self, e, obj): if isinstance(obj, _CallItem): # fromat traceback only on python3 pickling_error = PicklingError( "Could not pickle the task to send it to the workers.") tb = traceback.format_exception(type(e), e, getattr(e, "__traceback__", None)) pickling_error.__cause__ = _RemoteTraceback('\n"""\n{}"""'.format( ''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) self.running_work_items.remove(obj.work_id) # work_item can be None if another process terminated. In this # case, the queue_manager_thread fails all work_items with # BrokenProcessPool if work_item is not None: work_item.future.set_exception(pickling_error) del work_item self.thread_wakeup.wakeup() else: super()._on_queue_feeder_error(e, obj)
async def save_pers(self, pid): # Save a persistent id reference if self.bin: await self.save(pid, save_persistent_id=False) await self.write(BINPERSID) else: try: await self.write(PERSID + str(pid).encode("ascii") + b'\n') except UnicodeEncodeError: raise PicklingError( "persistent IDs in protocol 0 must be ASCII strings")
def __getstate__(self): """ Our __getattr__ magic makes this necessary. """ if self._file is sys.stdout: return "stdout" elif self._file is sys.stderr: return "stderr" raise PicklingError( "Only PrintLoggers to sys.stdout and sys.stderr can be pickled.")
async def dump(self, obj): """Write a pickled representation of obj to the open file.""" # Check whether Pickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Pickler.dump(). if not hasattr(self, "_file_write"): raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: await self.write(PROTO + pack("<B", self.proto)) if self.proto >= 4: self.framer.start_framing() await self.save(obj) await self.write(STOP) self.framer.end_framing()
def save_package(self, path): saveSuccess = True try: outfile = open(path, 'wb') pickle.dump(self, outfile) outfile.close() except PicklingError: raise PicklingError( "Pickle has failed to save a Package object to disk.") saveSuccess = False else: return saveSuccess return saveSuccess
def pickle_dump(obj, file, protocol=0): """Like pickle.dump Raises: pickle.PicklingError """ if not 0 <= protocol <= 2: raise ValueError("Only protocol 0, 1, 2 allowed") try: return pickle.dump(obj, file, protocol) except PicklingError: raise except Exception as e: raise PicklingError(e)
def dumps(obj, encoding="utf-8", errors="strict"): """ Write a json representation of object as a bytes object Args: obj: object to represent encoding (str): encoding to use to encode bytes errors (str): same as encode 'errors' argument. Returns: object representation as a bytes object """ try: str_obj = json.dumps(encode(obj)) except TypeError as e: raise PicklingError(str(e)) return str_obj.encode(encoding=encoding, errors=errors)
def dump(obj, file, **kwargs): """ Write a json representation to the open file object Args: obj: object to represent file: open file object """ # apply json defaults if not present if 'indent' not in kwargs: kwargs['indent'] = 4 if 'separators' not in kwargs: kwargs['separators'] = (',', ': ') try: json.dump(encode(obj), file, **kwargs) except TypeError as e: raise PicklingError(str(e))
def pickle_dumps(obj, protocol=0): """Like pickle.dumps Raises: pickle.PicklingError """ if not 0 <= protocol <= 2: raise ValueError("Only protocol 0, 1, 2 allowed") try: # pickle.PicklingError is not cPickle.PicklingError # so this makes sure we only raise pickle.PicklingError even if # we use cPickle return pickle.dumps(obj, protocol) except PicklingError: raise except Exception as e: raise PicklingError(e)
def _send(self, message): """ wrap sendall method to provide additional features: pickle the message calculating the size of message and make sure the entire message is delivered. :type message: BaseClientMessage """ try: pmessage = pickle.dumps(message) except Exception: raise PicklingError(message) header = "{}{}".format(len(pmessage), "\n").encode() try: self.socket.sendall(header + pmessage) except Exception: raise
def __init__(self, function): if not callable(function): raise ValueError("Not callable: %r" % function) self.name = function.__name__ if self.name == '<lambda>': raise ValueError("Function cannot be a lambda.") self.modulename = function.__module__ if self.modulename == "__main__": import inspect, path modpath = path.path(inspect.getsourcefile(function)) self.modulename = str(modpath.stripext().basename()) try: # make sure we can find the function later self._import() except (KeyboardInterrupt, SystemExit): raise except: raise PicklingError("Can't pickle function %r" % function)
def __getstate__(self): raise PicklingError('no access to the daemon')