def run_remote(self, object_id, backend_id, operation_name, value): session_id = self.get_session_id() implementation_id = self.get_implementation_id(object_id, operation_name) try: execution_client = self.ready_clients[backend_id] except KeyError: exeenv = self.get_execution_environment_info(backend_id) execution_client = EEClient(exeenv.hostname, exeenv.port) self.ready_clients[backend_id] = execution_client operation = self.get_operation_info(object_id, operation_name) serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=(value,), iface_bitmaps=None, params_spec=operation.params, params_order=operation.paramsOrder, hint_volatiles=None, runtime=self ) ret = execution_client.ds_execute_implementation(object_id, implementation_id, session_id, serialized_params) if ret is not None: return DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self)
def update_object(self, into_object, from_object): session_id = self.get_session_id() backend_id = into_object.get_location() try: execution_client = self.ready_clients[backend_id] except KeyError: exeenv = self.get_execution_environment_info(backend_id) execution_client = EEClient(exeenv.hostname, exeenv.port) self.ready_clients[backend_id] = execution_client # We serialize objects like volatile parameters parameters = list() parameters.append(from_object) # TODO: modify serialize_params_or_return to not require this params_order = list() params_order.append("object") params_spec = dict() params_spec["object"] = "DataClayObject" # not used, see serialized_params_or_return serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=parameters, iface_bitmaps=None, params_spec=params_spec, params_order=params_order, hint_volatiles=backend_id, runtime=self, recursive=True, for_update=True) vol_objects = serialized_params.vol_objs if vol_objects is not None: new_ids = dict() for tag in vol_objects: cur_oid = serialized_params.vol_objs[tag].object_id if cur_oid not in new_ids: if cur_oid == from_object.get_object_id(): new_ids[cur_oid] = into_object.get_object_id() else: new_ids[cur_oid] = uuid.uuid4() serialized_params.vol_objs[tag] = ObjectWithDataParamOrReturn(new_ids[cur_oid], serialized_params.vol_objs[tag].class_id, serialized_params.vol_objs[tag].metadata, serialized_params.vol_objs[tag].obj_bytes) for vol_tag in vol_objects: oids = serialized_params.vol_objs[vol_tag].metadata.tags_to_oids for tag, oid in oids.items(): if oid in new_ids: try: serialized_params.vol_objs[vol_tag].metadata.tags_to_oids[tag] = new_ids[oid] except KeyError: pass execution_client.ds_update_object(session_id, into_object.get_object_id(), serialized_params)
def gc_collect_internal(self, object_to_update): """ @postcondition: Update object in db or store it if volatile (and register in LM) @param object_to_update: object to update """ try: self.logger.debug("[==GCUpdate==] Updating object %s", object_to_update.get_object_id()) """ Call EE update """ if object_to_update.is_pending_to_register(): self.logger.debug( f"[==GCUpdate==] Storing and registering object {object_to_update.get_object_id()}" ) obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc( object_to_update, False, None) self.exec_env.register_and_store_pending( object_to_update, obj_bytes, True) elif object_to_update.is_dirty(): self.logger.debug("[==GCUpdate==] Updating dirty object %s ", object_to_update.get_object_id()) obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc( object_to_update, False, None) self.runtime.update_to_sl(object_to_update.get_object_id(), obj_bytes, True) else: # TODO: how to check if GlobalGC is enabled? self.logger.debug( "[==GCUpdate==] Going to update dirty object in database object with ID %s ", object_to_update.get_object_id()) obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc_not_dirty( object_to_update, False, None, False) if obj_bytes is not None: ref_counting_bytes = DeserializationLibUtilsSingleton.extract_reference_counting( obj_bytes) self.runtime.update_to_sl(object_to_update.get_object_id(), ref_counting_bytes, False) else: self.logger.debug( "[==GCUpdate==] %s object is not dirty and have no references. Not going to SL", object_to_update.get_object_id()) except: # do nothing traceback.print_exc() """ TODO: set datasetid for GC if set by user """
def synchronize(self, instance, operation_name, params): session_id = self.get_session_id() object_id = instance.get_object_id() operation = self.get_operation_info(instance.get_object_id(), operation_name) implementation_id = self.get_implementation_id( instance.get_object_id(), operation_name) # === SERIALIZE PARAMETERS === serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=[params], iface_bitmaps=None, params_spec=operation.params, params_order=operation.paramsOrder, hint_volatiles=instance.get_hint(), runtime=self) self.execution_environment.synchronize(session_id, object_id, implementation_id, serialized_params)
def synchronize_federated(self, instance, params, operation_name, dc_info_id): self.logger.debug( "Calling external dataClay to run %s operation in object %s", operation_name, instance.get_object_id()) operation = self.get_operation_info(instance.get_object_id(), operation_name) implementation_id = self.get_implementation_id( instance.get_object_id(), operation_name) # === SERIALIZE PARAMETERS === # Between DC - DC , ifaceBitMaps = null serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=params, iface_bitmaps=None, params_spec=operation.params, params_order=operation.paramsOrder, hint_volatiles=instance.get_hint(), runtime=self) dc_info = self.get_external_dataclay_info(dc_info_id) hosts = dc_info.hosts ports = dc_info.ports for i in range(0, len(hosts)): try: lm_client = self.get_lm_api(hosts[i], ports[i]) self.logger.debug( "[==JUMP==] Request execution to external dataClay %s with host %s and port %s for object %s", dc_info_id, hosts[i], ports[i], instance.get_object_id()) lm_client.synchronize_federated_object( self.get_dataclay_id(), instance.get_object_id(), implementation_id, serialized_params) except: traceback.print_exc() if i + 1 == len(hosts): raise RuntimeError( "[dataClay] ERROR: Cannot connect to external dataClay with ID %s" % (str(dc_info_id)))
def synchronize(self, instance, operation_name, params): session_id = self.get_session_id() object_id = instance.get_object_id() dest_backend_id = self.get_location(instance.get_object_id()) operation = self.get_operation_info(instance.get_object_id(), operation_name) implementation_id = self.get_implementation_id( instance.get_object_id(), operation_name) # === SERIALIZE PARAMETER === serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=[params], iface_bitmaps=None, params_spec=operation.params, params_order=operation.paramsOrder, hint_volatiles=instance.get_hint(), runtime=self) try: execution_client = self.ready_clients[dest_backend_id] except KeyError: exeenv = self.get_execution_environment_info(dest_backend_id) execution_client = EEClient(exeenv.hostname, exeenv.port) self.ready_clients[dest_backend_id] = execution_client execution_client.synchronize(session_id, object_id, implementation_id, serialized_params)
def internal_store(self, instance, make_persistent=True): """Perform the storage (StoreObject call) for an instance. :param instance: The DataClayObject willing to be stored. :param make_persistent: Flag, True when DS_STORE_OBJECT should be called and False when DS_UPSERT_OBJECT is the method to be called. :return: A dictionary containing the classes for all stored objects. This function works for two main scenarios: the makePersistent one (in which the instance is not yet persistent) and the update (in which the instance is persistent). The return dictionary is the same in both cases, but note that the update should not use the provided instance for updating metadata to the LM. """ logger.debug("Store procedure for instance %r", instance) client = self.ready_clients["@STORAGE"] pending_objs = [instance] stored_objects_classes = dict() serialized_objs = list() reg_infos = list() dataset_id = self.execution_environment.thread_local_info.dataset_id while pending_objs: current_obj = pending_objs.pop() # Lock and make sure it is loaded current_obj_id = current_obj.get_object_id() self.lock(current_obj_id) # Avoid GC clean object while storing it try: if not current_obj.is_loaded(): current_obj = self.get_or_new_instance_from_db( current_obj_id, False) dcc_extradata = current_obj.get_class_extradata() object_id = current_obj.get_object_id() if make_persistent: # Ignore already persistent objects if current_obj.is_persistent(): continue dcc_extradata = current_obj.get_class_extradata() infos = [ object_id, dcc_extradata.class_id, self. execution_environment.thread_local_info.session_id, dataset_id ] reg_infos.append(infos) # This object will soon be persistent current_obj.set_persistent(True) current_obj.set_hint(settings.environment_id) # Just in case (should have been loaded already) logger.debug( "Setting loaded to true from internal store for object %s" % str(object_id)) current_obj.set_loaded(True) # First store since others OIDs are recursively created while creating MetaData if not object_id: if not make_persistent: raise DataClayException( "Objects should never be uuid-less for non-make_persistent use cases" ) object_id = uuid.uuid4() current_obj.set_object_id(object_id) current_obj.set_dataset_id(self.execution_environment. thread_local_info.dataset_id) logger.debug( "Ready to make persistent object {%s} of class %s {%s}" % (object_id, dcc_extradata.classname, dcc_extradata.class_id)) stored_objects_classes[object_id] = dcc_extradata.class_id # If we are not in a make_persistent, the dataset_id hint is null (?) serialized_objs.append( SerializationLibUtilsSingleton.serialize_dcobj_with_data( current_obj, pending_objs, False, None, self, False)) finally: self.unlock(current_obj_id) if make_persistent: lm_client = self.ready_clients["@LM"] lm_client.register_objects(reg_infos, settings.environment_id, None, None, LANG_PYTHON) client.ds_store_objects( self.execution_environment.thread_local_info.session_id, serialized_objs, False, None) else: client.ds_upsert_objects( self.execution_environment.thread_local_info.session_id, serialized_objs)
def serialize(self, io_file, ignore_user_types, iface_bitmaps, cur_serialized_objs, pending_objs, reference_counting): # Reference counting information # First integer represent the position in the buffer in which # reference counting starts. This is done to avoid "holding" # unnecessary information during a store or update in disk. # in new serialization, this will be done through padding # TODO: use padding instead once new serialization is implemented IntegerWrapper().write(io_file, 0) cur_master_loc = self.get_master_location() if cur_master_loc is not None: StringWrapper().write(io_file, str(cur_master_loc)) else: StringWrapper().write(io_file, str("x")) if hasattr(self, "__getstate__"): # The object has a user-defined serialization method. # Use that dco_extradata = self.__dclay_instance_extradata last_loaded_flag = dco_extradata.loaded_flag last_persistent_flag = dco_extradata.persistent_flag dco_extradata.loaded_flag = True dco_extradata.persistent_flag = False # Use pickle to the result of the serialization if six.PY2: import cPickle as pickle elif six.PY3: import _pickle as pickle state = pickle.dumps(self.__getstate__(), protocol=-1) # Leave the previous value, probably False & True` dco_extradata.loaded_flag = last_loaded_flag dco_extradata.persistent_flag = last_persistent_flag StringWrapper(mode="binary").write(io_file, state) else: # Regular dataClay provided serialization # Get the list of properties, making sure it is sorted properties = sorted(self.get_class_extradata().properties.values(), key=attrgetter('position')) logger.verbose("Serializing list of properties: %s", properties) for p in properties: try: value = object.__getattribute__( self, "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name)) except AttributeError: value = None logger.verbose("Serializing property %s with value %s ", p.name, value) if value is None: BooleanWrapper().write(io_file, False) else: if isinstance(p.type, UserType): if not ignore_user_types: BooleanWrapper().write(io_file, True) SerializationLibUtilsSingleton.serialize_association( io_file, value, cur_serialized_objs, pending_objs, reference_counting) else: BooleanWrapper().write(io_file, False) else: BooleanWrapper().write(io_file, True) pck = Pickler(io_file, protocol=-1) pck.persistent_id = PersistentIdPicklerHelper( cur_serialized_objs, pending_objs, reference_counting) pck.dump(value) # Reference counting # TODO: this should be removed in new serialization # TODO: (by using paddings to directly access reference counters inside metadata) cur_stream_pos = io_file.tell() io_file.seek(0) IntegerWrapper().write(io_file, cur_stream_pos) io_file.seek(cur_stream_pos) reference_counting.serialize_reference_counting( self.get_object_id(), io_file)
def call_execute_to_ds(self, instance, parameters, operation_name, exeenv_id, using_hint): object_id = instance.get_object_id() operation = self.get_operation_info(object_id, operation_name) session_id = self.get_session_id() implementation_id = self.get_implementation_id(object_id, operation_name) # // === SERIALIZE PARAMETERS === // serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return( params=parameters, iface_bitmaps=None, params_spec=operation.params, params_order=operation.paramsOrder, hint_volatiles=exeenv_id, runtime=self) if serialized_params is not None and serialized_params.vol_objs is not None: for param in serialized_params.vol_objs.values(): self.volatile_parameters_being_send.add(param.object_id) # // === EXECUTE === // max_retry = Configuration.MAX_EXECUTION_RETRIES num_misses = 0 executed = False for _ in range(max_retry): try: self.logger.verbose("Obtaining API for remote execution in %s ", exeenv_id) execution_client = self.ready_clients[exeenv_id] except KeyError: exeenv = self.get_execution_environment_info(exeenv_id) self.logger.debug("Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d", exeenv_id, exeenv.hostname, exeenv.port) execution_client = EEClient(exeenv.hostname, exeenv.port) self.ready_clients[exeenv_id] = execution_client try: self.logger.verbose("Calling remote EE %s ", exeenv_id) ret = execution_client.ds_execute_implementation( object_id, implementation_id, session_id, serialized_params) executed = True break except (DataClayException, RpcError) as dce: self.logger.warning("Execution resulted in an error, retrying...", exc_info=dce) is_race_condition = False if serialized_params is not None and serialized_params.persistent_refs is not None: for param in serialized_params.persistent_refs: if param.object_id in self.volatile_parameters_being_send: is_race_condition = True break if not is_race_condition: num_misses = num_misses + 1 self.logger.debug("Exception dataclay during execution. Retrying...") self.logger.debug(str(dce)) locations = self.get_from_heap(object_id).get_replica_locations() if locations is None or len(locations) == 0: try: locations = self.get_metadata(object_id).locations new_location = False except DataClayException: locations = None if locations is None: self.logger.warning("Execution failed and no metadata available. Cannot continue") raise for loc in locations: self.logger.debug("Found location %s" % str(loc)) if loc != exeenv_id: exeenv_id = loc self.logger.debug("Found different location %s" % str(loc)) new_location = True break if not new_location: exeenv_id = next(iter(locations)) if using_hint: instance.set_hint(exeenv_id) self.logger.debug("[==Miss Jump==] MISS. The object %s was not in the exec.location %s. Retrying execution." % (instance.get_object_id(), str(exeenv_id))); if serialized_params is not None and serialized_params.vol_objs is not None: for param in serialized_params.vol_objs.values(): if num_misses > 0: #=========================================================== # if there was a miss, it means that the persistent object in which we were executing # was not in the choosen location. As you can see in the serialize parameters function above # we provide the execution environment as hint to set to volatile parameters. In EE, before # deserialization of volatiles we check if the persistent object in which to execute a method is # there, if not, EE raises and exception. Therefore, if there was a miss, we know that the # hint we set in volatile parameters is wrong, because they are going to be deserialized/stored # in the same location as the object with the method to execute #=========================================================== param_instance = self.get_from_heap(param.object_id) param_instance.set_hint(exeenv_id) self.volatile_parameters_being_send.remove(param.object_id) if not executed: raise RuntimeError("[dataClay] ERROR: Trying to execute remotely object but something went wrong. " "Maybe the object is still not stored (in case of asynchronous makepersistent) and " "waiting time is not enough. Maybe the object does not exist anymore due to a remove. " "Or Maybe an exception happened in the server and the call failed.") result = None if ret is None: self.logger.debug(f"Result of operation named {operation_name} received: None") else: self.logger.debug(f"Deserializing result of operation named {operation_name}, return type is {operation.returnType.signature}") result = DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self) self.logger.debug(f"Deserialization of result of operation named {operation_name} successfully finished.") return result
def make_persistent(self, instance, alias, backend_id, recursive): """ This method creates a new Persistent Object using the provided stub instance and, if indicated, all its associated objects also Logic module API used for communication This function is called from a stub/execution class :param instance: Instance to make persistent :param backend_id: Indicates which is the destination backend :param recursive: Indicates if make persistent is recursive :param alias: Alias for the object :returns: ID of the backend in which te object was persisted. :type instance: DataClayObject :type backend_id: DataClayID :type recursive: boolean :type alias: string :rtype: DataClayID :raises RuntimeError: if backend id is UNDEFINED_LOCAL. """ self.logger.debug( "Starting make persistent object for instance with id %s", instance.get_object_id()) if backend_id is UNDEFINED_LOCAL: # This is a commonruntime end user pitfall, # @abarcelo thinks that it is nice # (and exceptionally detailed) error raise RuntimeError(""" You are trying to use dataclay.api.LOCAL but either: - dataClay has not been initialized properly - LOCAL has been wrongly imported. Be sure to use LOCAL with: from dataclay import api and reference it with `api.LOCAL` Refusing the temptation to guess.""") location = instance.get_hint() if location is None: location = backend_id # Choose location if needed # If object is already persistent -> it must have a Hint (location = hint here) # If object is not persistent -> location is choosen (provided backend id or random, hash...). if location is None: location = self.choose_location(instance, alias) if not instance.is_persistent(): if alias is not None: # Add a new alias to an object. # Use cases: # 1 - object was persisted without alias and not yet registered -> we need to register it with new alias. # 2 - object was persisted and it is already registered -> we only add a new alias # 3 - object was persisted with an alias and it must be already registered -> we add a new alias. # From client side, we cannot check if object is registered or not (we do not have isPendingToRegister like EE) # Therefore, we call LogicModule with all information for registration. reg_infos = list() reg_info = RegistrationInfo( instance.get_object_id(), instance.get_class_extradata().class_id, self.get_session_id(), instance.get_dataset_id(), alias) reg_infos.append(reg_info) new_object_ids = self.ready_clients["@LM"].register_objects( reg_infos, location, LANG_PYTHON) self.logger.debug(f"Received ids: {new_object_ids}") new_object_id = next(iter(new_object_ids)) self.update_object_id(instance, new_object_id) self.alias_cache[alias] = instance.get_object_id( ), instance.get_class_extradata().class_id, location # === MAKE PERSISTENT === # self.logger.debug("Instance with object ID %s being send to EE", instance.get_object_id()) # set the default master location instance.set_master_location(location) instance.set_alias(alias) # We serialize objects like volatile parameters parameters = list() parameters.append(instance) params_order = list() params_order.append("object") params_spec = dict() params_spec[ "object"] = "DataClayObject" # not used, see serialized_params_or_return serialized_objs = SerializationLibUtilsSingleton.serialize_params_or_return( params=parameters, iface_bitmaps=None, params_spec=params_spec, params_order=params_order, hint_volatiles=location, runtime=self, recursive=recursive) # Avoid some race-conditions in communication (make persistent + execute where # execute arrives before). # TODO: fix volatiles under deserialization support for __setstate__ and __getstate__ self.add_volatiles_under_deserialization( serialized_objs.vol_objs.values()) # Get EE try: execution_client = self.ready_clients[location] except KeyError: exeenv = self.get_execution_environment_info(location) self.logger.debug( "Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d", location, exeenv.hostname, exeenv.port) execution_client = EEClient(exeenv.hostname, exeenv.port) self.ready_clients[location] = execution_client # Call EE self.logger.verbose("Calling make persistent to EE %s ", location) execution_client.make_persistent(settings.current_session_id, serialized_objs.vol_objs.values()) # update the hint with the location, and return it instance.set_hint(location) # remove volatiles under deserialization self.remove_volatiles_under_deserialization( serialized_objs.vol_objs.values()) object_id = instance.get_object_id() locations = set() locations.add(location) metadata_info = MetaDataInfo(object_id, False, instance.get_dataset_id(), instance.get_class_extradata().class_id, locations, alias, None) self.metadata_cache[object_id] = metadata_info return location