예제 #1
0
def size_tracking(io_file):
    """Track the bytes written into a certain seekable I/O file.
    :param io_file: The I/O file being written inside the with statement.
    """
    # Hack a little bit a circular import
    from dataclay.serialization.python.lang.IntegerWrapper import IntegerWrapper

    start_track = io_file.tell()
    IntegerWrapper(32).write(io_file, 0)
    start_data = io_file.tell()
    yield
    end_data = io_file.tell()
    io_file.seek(start_track)
    IntegerWrapper(32).write(io_file, end_data - start_data)
    io_file.seek(end_data)
예제 #2
0
    def write(self, io_file, value):
        if self._nullable:
            if value is None:
                BooleanWrapper().write(io_file, False)
                return
            else:
                BooleanWrapper().write(io_file, True)

        if self._mode == "utf-8":
            ba = value.encode('utf-8')
        elif self._mode == "utf-16":
            ba = value.encode('utf-16-be')
        elif self._mode == "binary":
            if isinstance(value, BytesIO):
                ba = value.getvalue()
            else:
                if six.PY2:
                    ba = bytes(value)
                elif six.PY3:
                    ba = bytes(value, "utf-8")
        else:
            raise TypeError("Internal mode {} not recognized".format(
                self._mode))

        IntegerWrapper(32).write(io_file, len(ba))
        io_file.write(ba)
예제 #3
0
 def extract_reference_counting(self, io_bytes):
     io_file = BytesIO(io_bytes)
     io_file.seek(0)
     ref_counting_pos = IntegerWrapper().read(io_file)
     io_file.seek(ref_counting_pos)
     # read up to last byte
     ref_bytes = io_file.read()
     io_file.close()
     return ref_bytes
예제 #4
0
    def serialize_reference_counting(self, dc_obj, io_file):
        """ TODO: IMPORTANT: this should be removed in new serialization by using paddings to directly access reference counters inside
         metadata. """
        """
        @postcondition: Serialize reference counting (garbage collector information)
        @param dc_obj: dc object with ref counting
        @param io_file: Buffer in which to serialize
        @param reference_counting: Reference counting to serialize
        """
        self.external_references = 0
        if dc_obj.get_alias() is not None and dc_obj.get_alias() != "":
            logger.trace("Found alias reference")
            self.external_references = self.external_references + 1

        cur_dataclay_id = getRuntime().get_dataclay_id()
        if dc_obj.get_replica_locations() is not None and len(
                dc_obj.get_replica_locations()) != 0:
            for replica_loc in dc_obj.get_replica_locations():
                replica_dataclay_id = getRuntime(
                ).get_execution_environment_info(
                    replica_loc).dataclay_instance_id
                if replica_dataclay_id != cur_dataclay_id:
                    logger.trace("Found federation reference")
                    self.external_references = self.external_references + 1
                    break
        logger.trace(
            f"Serializing reference counting external references = {self.external_references}"
        )
        IntegerWrapper().write(io_file, self.external_references)
        IntegerWrapper().write(io_file, len(self.reference_counting))
        for location, ref_counting_in_loc in self.reference_counting.items():
            if location is None:
                BooleanWrapper().write(io_file, True)
            else:
                BooleanWrapper().write(io_file, False)
                StringWrapper().write(io_file, str(location))

            IntegerWrapper().write(io_file, len(ref_counting_in_loc))
            for oid, counter in ref_counting_in_loc.items():
                StringWrapper().write(io_file, str(oid))
                IntegerWrapper().write(io_file, counter)
예제 #5
0
 def serialize_reference_counting(self, referrer_oid, io_file):
     """ TODO: IMPORTANT: this should be removed in new serialization by using paddings to directly access reference counters inside
      metadata. """
     """
     @postcondition: Serialize reference counting (garbage collector information)
     @param referrer_oid: ID of referrer object
     @param io_file: Buffer in which to serialize
     @param reference_counting: Reference counting to serialize
     """
             
     IntegerWrapper().write(io_file, len(self.reference_counting))
     for location, ref_counting_in_loc in self.reference_counting.items():
         if location is None:
             BooleanWrapper().write(io_file, True)
         else:
             BooleanWrapper().write(io_file, False)
             StringWrapper().write(io_file, str(location))
         
         IntegerWrapper().write(io_file, len(ref_counting_in_loc))
         for oid, counter in ref_counting_in_loc.items():
             StringWrapper().write(io_file, str(oid))
             IntegerWrapper().write(io_file, counter)
예제 #6
0
    def read(self, io_file):
        if self._nullable:
            is_not_null = BooleanWrapper().read(io_file)
            if not is_not_null:
                return None

        size = IntegerWrapper(32).read(io_file)
        ba = io_file.read(size)

        if self._mode == "utf-8":
            return ba.decode('utf-8')
        elif self._mode == "utf-16":
            return ba.decode('utf-16-be')
        elif self._mode == "binary":
            return ba
        else:
            raise TypeError("Internal mode {} not recognized".format(
                self._mode))
예제 #7
0
    def read(self, io_file):
        from dataclay.util.management.classmgr.Utils import serialization_types
        try:
            return serialization_types[self._signature].read(io_file)
        except KeyError:
            pass

        # numpy have their own special ultra-fast serialization
        if self._signature.startswith(self.NUMPY_SIGNATURE):
            import numpy as np
            # Ignoring field size, as numpy is selfcontained in that matter
            _ = IntegerWrapper(32).read(io_file)
            return np.load(io_file, allow_pickle=False)

        # anything is also a special case, also all its alias
        if self._signature == self.ANYTHING_SIGNATURE or \
                self._signature == self.STORAGEOBJECT_SIGNATURE:
            field_size = IntegerWrapper(32).read(io_file)
            logger.debug("Deserializing DataClayObject from pickle")

            return pickle.loads(io_file.read(field_size))

        # Everything shoulda be a python type...
        if not self._signature.startswith(self.PYTHON_PREFIX):
            # ... except the fallbacks (mostly for subtypes like lists of persistent objects)
            # TODO: Check pickle fallback or ignore it completely
            field_size = IntegerWrapper(32).read(io_file)
            return pickle.loads(io_file.read(field_size))

        subtype = self._signature[len(self.PYTHON_PREFIX):]

        sequence_match = self.SEQUENCE_REGEX.match(subtype)
        mapping_match = self.MAPPING_REGEX.match(subtype)

        if sequence_match:
            gd = sequence_match.groupdict()
            logger.debug("Deserializing a Python Sequence with the following match: %s", gd)

            if gd["subtype"]:
                instances_type = PyTypeWildcardWrapper(gd["subtype"], pickle_fallback=True)
            else:  # list without subtypes information
                instances_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)

            ret = list()
            size = IntegerWrapper(32).read(io_file)
            logger.debug("### READ SIZE OF SEQUENCE MATCH: %i", size)

            for i in range(size):
                if BooleanWrapper().read(io_file):
                    ret.append(instances_type.read(io_file))
                else:
                    ret.append(None)
            
            if gd["base_type"] == "tuple":
                logger.debug("Returning deserialized Python tuple")
                return tuple(ret)
            else:
                logger.debug("Returning deserialized Python list")
                return ret

        elif mapping_match:
            gd = mapping_match.groupdict()
            logger.debug("Deserializing a Python mapping with the following match: %s", gd)

            if gd["keytype"] and gd["valuetype"]:
                key_type = PyTypeWildcardWrapper(gd["keytype"], pickle_fallback=True)
                value_type = PyTypeWildcardWrapper(gd["valuetype"], pickle_fallback=True)
            else:
                # dict without subtypes information
                key_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)
                value_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)

            ret = dict()
            size = IntegerWrapper(32).read(io_file)
            for i in range(size):
                if BooleanWrapper().read(io_file):
                    key = key_type.read(io_file)
                else:
                    key = None

                if BooleanWrapper().read(io_file):
                    ret[key] = value_type.read(io_file)
                else:
                    ret[key] = None
            logger.debug("Returning deserialized Python map")
            return ret

        elif subtype == self.STR_SIGNATURE:
            if six.PY2:
                return StringWrapper('binary').read(io_file)
            elif six.PY3:
                return StringWrapper('utf-8').read(io_file)
        elif subtype == self.UNICODE_SIGNATURE:
            return StringWrapper('utf-16').read(io_file)
        else:
            raise NotImplementedError("Python types supported at the moment: "
                                      "list and mappings (but not `%s`), sorry" % subtype)
예제 #8
0
    def write(self, io_file, value):
        value = safe_wait_if_compss_future(value)

        from dataclay.util.management.classmgr.Utils import serialization_types
        try:
            serialization_types[self._signature].write(io_file, value)
            return
        except KeyError:
            pass

        # numpy have their own special ultra-fast serialization
        if self._signature.startswith(self.NUMPY_SIGNATURE):
            import numpy as np
            with size_tracking(io_file):
                np.save(io_file, value)
            return

        # anything is also a special case, also all its alias
        if self._signature == self.ANYTHING_SIGNATURE or \
                self._signature == self.STORAGEOBJECT_SIGNATURE:
            s = pickle.dumps(value, protocol=-1)
            IntegerWrapper(32).write(io_file, len(s))
            io_file.write(s)
            return

        # Everything shoulda be a python type...
        if not self._signature.startswith(self.PYTHON_PREFIX):
            # ... except the fallbacks (mostly for subtypes like lists of persistent objects)
            # TODO: Check pickle fallback or ignore it completely

            s = pickle.dumps(value, protocol=-1)
            IntegerWrapper(32).write(io_file, len(s))
            io_file.write(s)
            return

        # Now everything must be a python type
        assert self._signature.startswith(self.PYTHON_PREFIX), \
            "Signature for Python types is expected to start with " \
            "'python'. Found signature: %s" % self._signature

        subtype = self._signature[len(self.PYTHON_PREFIX):]

        sequence_match = self.SEQUENCE_REGEX.match(subtype)
        mapping_match = self.MAPPING_REGEX.match(subtype)

        if sequence_match:
            gd = sequence_match.groupdict()
            logger.debug("Serializing a Python Sequence with the following match: %s", gd)

            if gd["subtype"]:
                instances_type = PyTypeWildcardWrapper(gd["subtype"], pickle_fallback=True)
            else:  # list without subtypes information
                instances_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)

            IntegerWrapper(32).write(io_file, len(value))
            for elem in value:
                if elem is None:
                    BooleanWrapper().write(io_file, False)
                else:
                    BooleanWrapper().write(io_file, True)
                    instances_type.write(io_file, elem)

        elif mapping_match:
            gd = mapping_match.groupdict()
            logger.debug("Serializing a Python Mapping with the following match: %s", gd)

            if gd["keytype"] and gd["valuetype"]:
                key_type = PyTypeWildcardWrapper(gd["keytype"], pickle_fallback=True)
                value_type = PyTypeWildcardWrapper(gd["valuetype"], pickle_fallback=True)
            else:  # dict without subtypes information
                key_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)
                value_type = PyTypeWildcardWrapper(self.ANYTHING_SIGNATURE)

            IntegerWrapper(32).write(io_file, len(value))

            for k, v in value.items():
                if k is None:
                    BooleanWrapper().write(io_file, False)
                else:
                    BooleanWrapper().write(io_file, True)
                    key_type.write(io_file, k)

                if v is None:
                    BooleanWrapper().write(io_file, False)
                else:
                    # ToDo remove this when COMPSs behaves correctly with compss_wait_on(dict_instance)
                    v = safe_wait_if_compss_future(v)

                    BooleanWrapper().write(io_file, True)
                    value_type.write(io_file, v)

        elif subtype == self.STR_SIGNATURE:
            if six.PY2:
                StringWrapper('utf-8').write(io_file, value)
            elif six.PY3:
                StringWrapper('binary').write(io_file, value)
        elif subtype == self.UNICODE_SIGNATURE:
            StringWrapper('utf-16').write(io_file, value)
        else:
            raise NotImplementedError("Python types supported at the moment: "
                                      "list and mappings (but not `%s`), sorry" % subtype)
예제 #9
0
 def write(self, io_file, value):
     if value:
         IntegerWrapper(8).write(io_file, 0x01)
     else:
         IntegerWrapper(8).write(io_file, 0x00)
예제 #10
0
 def read(self, io_file):
     val = IntegerWrapper(8).read(io_file)
     if val == 0:
         return False
     else:
         return True
예제 #11
0
    def deserialize(self, io_file, iface_bitmaps, metadata,
                    cur_deserialized_python_objs):
        """Reciprocal to serialize."""
        logger.verbose("Deserializing object %s", str(self.get_object_id()))

        # Put slow debugging info inside here:
        #
        # NOTE: new implementation of ExecutionGateway assert is not needed and wrong
        # if logger.isEnabledFor(DEBUG):
        #     klass = self.__class__
        #     logger.debug("Deserializing instance %r from class %s",
        #                  self, klass.__name__)
        #     logger.debug("The previous class is from module %s, in file %s",
        #                  klass.__module__, inspect.getfile(klass))
        #     logger.debug("The class extradata is:\n%s", klass._dclay_class_extradata)
        #     assert klass._dclay_class_extradata == self._dclay_class_extradata
        #
        # LOADED FLAG = TRUE only once deserialization is finished to avoid concurrent problems!
        # # This may be due to race conditions. It may need to do some extra locking
        # if self.__dclay_instance_extradata.loaded_flag:
        #     logger.debug("Loaded Flag is True")
        # else:
        #     self.__dclay_instance_extradata.loaded_flag = True
        """ reference counting """
        """ discard padding """
        IntegerWrapper().read(io_file)
        """ deserialize master_location """
        des_master_loc_str = StringWrapper().read(io_file)
        if des_master_loc_str == "x":
            self.__dclay_instance_extradata.master_location = None
        else:
            self.__dclay_instance_extradata.master_location = UUID(
                des_master_loc_str)

        if hasattr(self, "__setstate__"):
            # The object has a user-defined deserialization method.

            # Use pickle, and use that method instead
            if six.PY2:
                import cPickle as pickle
            elif six.PY3:
                import _pickle as pickle

            state = pickle.loads(StringWrapper(mode="binary").read(io_file))
            self.__setstate__(state)

        else:
            # Regular dataClay provided deserialization

            # Start by getting the properties
            properties = sorted(self.get_class_extradata().properties.values(),
                                key=attrgetter('position'))

            logger.trace("Tell io_file before loop: %s", io_file.tell())
            logger.verbose("Deserializing list of properties: %s", properties)

            for p in properties:

                logger.trace("Tell io_file in loop: %s", io_file.tell())
                not_null = BooleanWrapper().read(io_file)
                value = None
                if not_null:
                    logger.debug("Not null property %s", p.name)
                    if isinstance(p.type, UserType):
                        try:
                            logger.debug("Property %s is an association",
                                         p.name)
                            value = DeserializationLibUtilsSingleton.deserialize_association(
                                io_file, iface_bitmaps, metadata,
                                cur_deserialized_python_objs, getRuntime())
                        except KeyError as e:
                            logger.error('Failed to deserialize association',
                                         exc_info=True)
                    else:
                        try:
                            upck = Unpickler(io_file)
                            upck.persistent_load = PersistentLoadPicklerHelper(
                                metadata, cur_deserialized_python_objs,
                                getRuntime())
                            value = upck.load()
                        except:
                            traceback.print_exc()

                logger.debug("Setting value %s for property %s", value, p.name)

                object.__setattr__(self,
                                   "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name),
                                   value)
        """ reference counting bytes here """
        """ TODO: discard bytes? """
예제 #12
0
    def serialize(self, io_file, ignore_user_types, iface_bitmaps,
                  cur_serialized_objs, pending_objs, reference_counting):
        # Reference counting information
        # First integer represent the position in the buffer in which
        # reference counting starts. This is done to avoid "holding"
        # unnecessary information during a store or update in disk.

        # in new serialization, this will be done through padding
        # TODO: use padding instead once new serialization is implemented
        IntegerWrapper().write(io_file, 0)

        cur_master_loc = self.get_master_location()
        if cur_master_loc is not None:
            StringWrapper().write(io_file, str(cur_master_loc))
        else:
            StringWrapper().write(io_file, str("x"))

        if hasattr(self, "__getstate__"):
            # The object has a user-defined serialization method.
            # Use that
            dco_extradata = self.__dclay_instance_extradata
            last_loaded_flag = dco_extradata.loaded_flag
            last_persistent_flag = dco_extradata.persistent_flag
            dco_extradata.loaded_flag = True
            dco_extradata.persistent_flag = False

            # Use pickle to the result of the serialization
            if six.PY2:
                import cPickle as pickle
            elif six.PY3:
                import _pickle as pickle

            state = pickle.dumps(self.__getstate__(), protocol=-1)

            # Leave the previous value, probably False & True`
            dco_extradata.loaded_flag = last_loaded_flag
            dco_extradata.persistent_flag = last_persistent_flag

            StringWrapper(mode="binary").write(io_file, state)

        else:
            # Regular dataClay provided serialization
            # Get the list of properties, making sure it is sorted
            properties = sorted(self.get_class_extradata().properties.values(),
                                key=attrgetter('position'))

            logger.verbose("Serializing list of properties: %s", properties)

            for p in properties:

                try:
                    value = object.__getattribute__(
                        self, "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name))
                except AttributeError:
                    value = None

                logger.verbose("Serializing property %s with value %s ",
                               p.name, value)

                if value is None:
                    BooleanWrapper().write(io_file, False)
                else:
                    if isinstance(p.type, UserType):
                        if not ignore_user_types:
                            BooleanWrapper().write(io_file, True)
                            SerializationLibUtilsSingleton.serialize_association(
                                io_file, value, cur_serialized_objs,
                                pending_objs, reference_counting)
                        else:
                            BooleanWrapper().write(io_file, False)
                    else:
                        BooleanWrapper().write(io_file, True)
                        pck = Pickler(io_file, protocol=-1)
                        pck.persistent_id = PersistentIdPicklerHelper(
                            cur_serialized_objs, pending_objs,
                            reference_counting)
                        pck.dump(value)

        # Reference counting
        # TODO: this should be removed in new serialization
        # TODO: (by using paddings to directly access reference counters inside metadata)

        cur_stream_pos = io_file.tell()
        io_file.seek(0)
        IntegerWrapper().write(io_file, cur_stream_pos)
        io_file.seek(cur_stream_pos)
        reference_counting.serialize_reference_counting(
            self.get_object_id(), io_file)
예제 #13
0
{{ c }}{% endfor %}
""")

stub_only_def = Template("""
    @dclayEmptyMethod
    def {{ func_name }}(
            self{% for param in param_names %}{% if loop.first %},{% endif %}
            {{ param }}{% if loop.last %}
    {% endif %}{% else %}
    {% endfor %}):
        raise NotImplementedError("Language Error: Method {{ func_name }} is not available for Python")
""")

# Note that the class_id of language types are null since "dataClay 2"
mapping_table = [
    (("int", int, IntegerWrapper(64)), Type(
        signature='J',
        includes=[],
    )),
    (("float", float, FloatWrapper(64)), Type(
        signature='D',
        includes=[],
    )),
    (("bool", bool, BooleanWrapper()), Type(
        signature='Z',
        includes=[],
    )),
    (("None", None, NullWrapper()), Type(
        signature='V',
        includes=[],
    )),
예제 #14
0
 def extract_reference_counting(self, io_file):
     io_file.seek(0)
     ref_counting_pos = IntegerWrapper().read(io_file)
     io_file.seek(ref_counting_pos)
     # read up to last byte
     return io_file.read()