def size_tracking(io_file): """Track the bytes written into a certain seekable I/O file. :param io_file: The I/O file being written inside the with statement. """ # Hack a little bit a circular import from dataclay.serialization.python.lang.IntegerWrapper import IntegerWrapper start_track = io_file.tell() IntegerWrapper(32).write(io_file, 0) start_data = io_file.tell() yield end_data = io_file.tell() io_file.seek(start_track) IntegerWrapper(32).write(io_file, end_data - start_data) io_file.seek(end_data)
def write(self, io_file, value): if self._nullable: if value is None: BooleanWrapper().write(io_file, False) return else: BooleanWrapper().write(io_file, True) if self._mode == "utf-8": ba = value.encode('utf-8') elif self._mode == "utf-16": ba = value.encode('utf-16-be') elif self._mode == "binary": if isinstance(value, BytesIO): ba = value.getvalue() else: if six.PY2: ba = bytes(value) elif six.PY3: ba = bytes(value, "utf-8") else: raise TypeError("Internal mode {} not recognized".format( self._mode)) IntegerWrapper(32).write(io_file, len(ba)) io_file.write(ba)
def extract_reference_counting(self, io_bytes): io_file = BytesIO(io_bytes) io_file.seek(0) ref_counting_pos = IntegerWrapper().read(io_file) io_file.seek(ref_counting_pos) # read up to last byte ref_bytes = io_file.read() io_file.close() return ref_bytes
def serialize_reference_counting(self, dc_obj, io_file): """ TODO: IMPORTANT: this should be removed in new serialization by using paddings to directly access reference counters inside metadata. """ """ @postcondition: Serialize reference counting (garbage collector information) @param dc_obj: dc object with ref counting @param io_file: Buffer in which to serialize @param reference_counting: Reference counting to serialize """ self.external_references = 0 if dc_obj.get_alias() is not None and dc_obj.get_alias() != "": logger.trace("Found alias reference") self.external_references = self.external_references + 1 cur_dataclay_id = getRuntime().get_dataclay_id() if dc_obj.get_replica_locations() is not None and len( dc_obj.get_replica_locations()) != 0: for replica_loc in dc_obj.get_replica_locations(): replica_dataclay_id = getRuntime( ).get_execution_environment_info( replica_loc).dataclay_instance_id if replica_dataclay_id != cur_dataclay_id: logger.trace("Found federation reference") self.external_references = self.external_references + 1 break logger.trace( f"Serializing reference counting external references = {self.external_references}" ) IntegerWrapper().write(io_file, self.external_references) IntegerWrapper().write(io_file, len(self.reference_counting)) for location, ref_counting_in_loc in self.reference_counting.items(): if location is None: BooleanWrapper().write(io_file, True) else: BooleanWrapper().write(io_file, False) StringWrapper().write(io_file, str(location)) IntegerWrapper().write(io_file, len(ref_counting_in_loc)) for oid, counter in ref_counting_in_loc.items(): StringWrapper().write(io_file, str(oid)) IntegerWrapper().write(io_file, counter)
def serialize_reference_counting(self, referrer_oid, io_file): """ TODO: IMPORTANT: this should be removed in new serialization by using paddings to directly access reference counters inside metadata. """ """ @postcondition: Serialize reference counting (garbage collector information) @param referrer_oid: ID of referrer object @param io_file: Buffer in which to serialize @param reference_counting: Reference counting to serialize """ IntegerWrapper().write(io_file, len(self.reference_counting)) for location, ref_counting_in_loc in self.reference_counting.items(): if location is None: BooleanWrapper().write(io_file, True) else: BooleanWrapper().write(io_file, False) StringWrapper().write(io_file, str(location)) IntegerWrapper().write(io_file, len(ref_counting_in_loc)) for oid, counter in ref_counting_in_loc.items(): StringWrapper().write(io_file, str(oid)) IntegerWrapper().write(io_file, counter)
def read(self, io_file): if self._nullable: is_not_null = BooleanWrapper().read(io_file) if not is_not_null: return None size = IntegerWrapper(32).read(io_file) ba = io_file.read(size) if self._mode == "utf-8": return ba.decode('utf-8') elif self._mode == "utf-16": return ba.decode('utf-16-be') elif self._mode == "binary": return ba else: raise TypeError("Internal mode {} not recognized".format( self._mode))
def read(self, io_file): from dataclay.util.management.classmgr.Utils import serialization_types try: return serialization_types[self._signature].read(io_file) except KeyError: pass # numpy have their own special ultra-fast serialization if self._signature.startswith(self.NUMPY_SIGNATURE): import numpy as np # Ignoring field size, as numpy is selfcontained in that matter _ = IntegerWrapper(32).read(io_file) return np.load(io_file, allow_pickle=False) # anything is also a special case, also all its alias if self._signature == self.ANYTHING_SIGNATURE or \ self._signature == self.STORAGEOBJECT_SIGNATURE: field_size = IntegerWrapper(32).read(io_file) logger.debug("Deserializing DataClayObject from pickle") return pickle.loads(io_file.read(field_size)) # Everything shoulda be a python type... if not self._signature.startswith(self.PYTHON_PREFIX): # ... except the fallbacks (mostly for subtypes like lists of persistent objects) # TODO: Check pickle fallback or ignore it completely field_size = IntegerWrapper(32).read(io_file) return pickle.loads(io_file.read(field_size)) subtype = self._signature[len(self.PYTHON_PREFIX):] sequence_match = self.SEQUENCE_REGEX.match(subtype) mapping_match = self.MAPPING_REGEX.match(subtype) if sequence_match: gd = sequence_match.groupdict() logger.debug("Deserializing a Python Sequence with the following match: %s", gd) if gd["subtype"]: instances_type = PyTypeWildcardWrapper(gd["subtype"], pickle_fallback=True) else: # list without subtypes information instances_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) ret = list() size = IntegerWrapper(32).read(io_file) logger.debug("### READ SIZE OF SEQUENCE MATCH: %i", size) for i in range(size): if BooleanWrapper().read(io_file): ret.append(instances_type.read(io_file)) else: ret.append(None) if gd["base_type"] == "tuple": logger.debug("Returning deserialized Python tuple") return tuple(ret) else: logger.debug("Returning deserialized Python list") return ret elif mapping_match: gd = mapping_match.groupdict() logger.debug("Deserializing a Python mapping with the following match: %s", gd) if gd["keytype"] and gd["valuetype"]: key_type = PyTypeWildcardWrapper(gd["keytype"], pickle_fallback=True) value_type = PyTypeWildcardWrapper(gd["valuetype"], pickle_fallback=True) else: # dict without subtypes information key_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) value_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) ret = dict() size = IntegerWrapper(32).read(io_file) for i in range(size): if BooleanWrapper().read(io_file): key = key_type.read(io_file) else: key = None if BooleanWrapper().read(io_file): ret[key] = value_type.read(io_file) else: ret[key] = None logger.debug("Returning deserialized Python map") return ret elif subtype == self.STR_SIGNATURE: if six.PY2: return StringWrapper('binary').read(io_file) elif six.PY3: return StringWrapper('utf-8').read(io_file) elif subtype == self.UNICODE_SIGNATURE: return StringWrapper('utf-16').read(io_file) else: raise NotImplementedError("Python types supported at the moment: " "list and mappings (but not `%s`), sorry" % subtype)
def write(self, io_file, value): value = safe_wait_if_compss_future(value) from dataclay.util.management.classmgr.Utils import serialization_types try: serialization_types[self._signature].write(io_file, value) return except KeyError: pass # numpy have their own special ultra-fast serialization if self._signature.startswith(self.NUMPY_SIGNATURE): import numpy as np with size_tracking(io_file): np.save(io_file, value) return # anything is also a special case, also all its alias if self._signature == self.ANYTHING_SIGNATURE or \ self._signature == self.STORAGEOBJECT_SIGNATURE: s = pickle.dumps(value, protocol=-1) IntegerWrapper(32).write(io_file, len(s)) io_file.write(s) return # Everything shoulda be a python type... if not self._signature.startswith(self.PYTHON_PREFIX): # ... except the fallbacks (mostly for subtypes like lists of persistent objects) # TODO: Check pickle fallback or ignore it completely s = pickle.dumps(value, protocol=-1) IntegerWrapper(32).write(io_file, len(s)) io_file.write(s) return # Now everything must be a python type assert self._signature.startswith(self.PYTHON_PREFIX), \ "Signature for Python types is expected to start with " \ "'python'. Found signature: %s" % self._signature subtype = self._signature[len(self.PYTHON_PREFIX):] sequence_match = self.SEQUENCE_REGEX.match(subtype) mapping_match = self.MAPPING_REGEX.match(subtype) if sequence_match: gd = sequence_match.groupdict() logger.debug("Serializing a Python Sequence with the following match: %s", gd) if gd["subtype"]: instances_type = PyTypeWildcardWrapper(gd["subtype"], pickle_fallback=True) else: # list without subtypes information instances_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) IntegerWrapper(32).write(io_file, len(value)) for elem in value: if elem is None: BooleanWrapper().write(io_file, False) else: BooleanWrapper().write(io_file, True) instances_type.write(io_file, elem) elif mapping_match: gd = mapping_match.groupdict() logger.debug("Serializing a Python Mapping with the following match: %s", gd) if gd["keytype"] and gd["valuetype"]: key_type = PyTypeWildcardWrapper(gd["keytype"], pickle_fallback=True) value_type = PyTypeWildcardWrapper(gd["valuetype"], pickle_fallback=True) else: # dict without subtypes information key_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) value_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE) IntegerWrapper(32).write(io_file, len(value)) for k, v in value.items(): if k is None: BooleanWrapper().write(io_file, False) else: BooleanWrapper().write(io_file, True) key_type.write(io_file, k) if v is None: BooleanWrapper().write(io_file, False) else: # ToDo remove this when COMPSs behaves correctly with compss_wait_on(dict_instance) v = safe_wait_if_compss_future(v) BooleanWrapper().write(io_file, True) value_type.write(io_file, v) elif subtype == self.STR_SIGNATURE: if six.PY2: StringWrapper('utf-8').write(io_file, value) elif six.PY3: StringWrapper('binary').write(io_file, value) elif subtype == self.UNICODE_SIGNATURE: StringWrapper('utf-16').write(io_file, value) else: raise NotImplementedError("Python types supported at the moment: " "list and mappings (but not `%s`), sorry" % subtype)
def write(self, io_file, value): if value: IntegerWrapper(8).write(io_file, 0x01) else: IntegerWrapper(8).write(io_file, 0x00)
def read(self, io_file): val = IntegerWrapper(8).read(io_file) if val == 0: return False else: return True
def deserialize(self, io_file, iface_bitmaps, metadata, cur_deserialized_python_objs): """Reciprocal to serialize.""" logger.verbose("Deserializing object %s", str(self.get_object_id())) # Put slow debugging info inside here: # # NOTE: new implementation of ExecutionGateway assert is not needed and wrong # if logger.isEnabledFor(DEBUG): # klass = self.__class__ # logger.debug("Deserializing instance %r from class %s", # self, klass.__name__) # logger.debug("The previous class is from module %s, in file %s", # klass.__module__, inspect.getfile(klass)) # logger.debug("The class extradata is:\n%s", klass._dclay_class_extradata) # assert klass._dclay_class_extradata == self._dclay_class_extradata # # LOADED FLAG = TRUE only once deserialization is finished to avoid concurrent problems! # # This may be due to race conditions. It may need to do some extra locking # if self.__dclay_instance_extradata.loaded_flag: # logger.debug("Loaded Flag is True") # else: # self.__dclay_instance_extradata.loaded_flag = True """ reference counting """ """ discard padding """ IntegerWrapper().read(io_file) """ deserialize master_location """ des_master_loc_str = StringWrapper().read(io_file) if des_master_loc_str == "x": self.__dclay_instance_extradata.master_location = None else: self.__dclay_instance_extradata.master_location = UUID( des_master_loc_str) if hasattr(self, "__setstate__"): # The object has a user-defined deserialization method. # Use pickle, and use that method instead if six.PY2: import cPickle as pickle elif six.PY3: import _pickle as pickle state = pickle.loads(StringWrapper(mode="binary").read(io_file)) self.__setstate__(state) else: # Regular dataClay provided deserialization # Start by getting the properties properties = sorted(self.get_class_extradata().properties.values(), key=attrgetter('position')) logger.trace("Tell io_file before loop: %s", io_file.tell()) logger.verbose("Deserializing list of properties: %s", properties) for p in properties: logger.trace("Tell io_file in loop: %s", io_file.tell()) not_null = BooleanWrapper().read(io_file) value = None if not_null: logger.debug("Not null property %s", p.name) if isinstance(p.type, UserType): try: logger.debug("Property %s is an association", p.name) value = DeserializationLibUtilsSingleton.deserialize_association( io_file, iface_bitmaps, metadata, cur_deserialized_python_objs, getRuntime()) except KeyError as e: logger.error('Failed to deserialize association', exc_info=True) else: try: upck = Unpickler(io_file) upck.persistent_load = PersistentLoadPicklerHelper( metadata, cur_deserialized_python_objs, getRuntime()) value = upck.load() except: traceback.print_exc() logger.debug("Setting value %s for property %s", value, p.name) object.__setattr__(self, "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name), value) """ reference counting bytes here """ """ TODO: discard bytes? """
def serialize(self, io_file, ignore_user_types, iface_bitmaps, cur_serialized_objs, pending_objs, reference_counting): # Reference counting information # First integer represent the position in the buffer in which # reference counting starts. This is done to avoid "holding" # unnecessary information during a store or update in disk. # in new serialization, this will be done through padding # TODO: use padding instead once new serialization is implemented IntegerWrapper().write(io_file, 0) cur_master_loc = self.get_master_location() if cur_master_loc is not None: StringWrapper().write(io_file, str(cur_master_loc)) else: StringWrapper().write(io_file, str("x")) if hasattr(self, "__getstate__"): # The object has a user-defined serialization method. # Use that dco_extradata = self.__dclay_instance_extradata last_loaded_flag = dco_extradata.loaded_flag last_persistent_flag = dco_extradata.persistent_flag dco_extradata.loaded_flag = True dco_extradata.persistent_flag = False # Use pickle to the result of the serialization if six.PY2: import cPickle as pickle elif six.PY3: import _pickle as pickle state = pickle.dumps(self.__getstate__(), protocol=-1) # Leave the previous value, probably False & True` dco_extradata.loaded_flag = last_loaded_flag dco_extradata.persistent_flag = last_persistent_flag StringWrapper(mode="binary").write(io_file, state) else: # Regular dataClay provided serialization # Get the list of properties, making sure it is sorted properties = sorted(self.get_class_extradata().properties.values(), key=attrgetter('position')) logger.verbose("Serializing list of properties: %s", properties) for p in properties: try: value = object.__getattribute__( self, "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name)) except AttributeError: value = None logger.verbose("Serializing property %s with value %s ", p.name, value) if value is None: BooleanWrapper().write(io_file, False) else: if isinstance(p.type, UserType): if not ignore_user_types: BooleanWrapper().write(io_file, True) SerializationLibUtilsSingleton.serialize_association( io_file, value, cur_serialized_objs, pending_objs, reference_counting) else: BooleanWrapper().write(io_file, False) else: BooleanWrapper().write(io_file, True) pck = Pickler(io_file, protocol=-1) pck.persistent_id = PersistentIdPicklerHelper( cur_serialized_objs, pending_objs, reference_counting) pck.dump(value) # Reference counting # TODO: this should be removed in new serialization # TODO: (by using paddings to directly access reference counters inside metadata) cur_stream_pos = io_file.tell() io_file.seek(0) IntegerWrapper().write(io_file, cur_stream_pos) io_file.seek(cur_stream_pos) reference_counting.serialize_reference_counting( self.get_object_id(), io_file)
{{ c }}{% endfor %} """) stub_only_def = Template(""" @dclayEmptyMethod def {{ func_name }}( self{% for param in param_names %}{% if loop.first %},{% endif %} {{ param }}{% if loop.last %} {% endif %}{% else %} {% endfor %}): raise NotImplementedError("Language Error: Method {{ func_name }} is not available for Python") """) # Note that the class_id of language types are null since "dataClay 2" mapping_table = [ (("int", int, IntegerWrapper(64)), Type( signature='J', includes=[], )), (("float", float, FloatWrapper(64)), Type( signature='D', includes=[], )), (("bool", bool, BooleanWrapper()), Type( signature='Z', includes=[], )), (("None", None, NullWrapper()), Type( signature='V', includes=[], )),
def extract_reference_counting(self, io_file): io_file.seek(0) ref_counting_pos = IntegerWrapper().read(io_file) io_file.seek(ref_counting_pos) # read up to last byte return io_file.read()