def getByID(identifier): '''Retrieves the object that has the given identifier from the Redis database. That is, given an identifier, retrieves the contents from the backend that correspond to this key, deserializes it and returns the reconstructed object. ''' serialized_contents = redis_connection.get(identifier) # In case that we have read a None then it means that the requested object # was not present in the Redis backend if serialized_contents is None: error_message = 'ERROR: Redis backend has no object with id %s' % identifier raise StorageException(error_message) return deserialize_from_string(serialized_contents)
def getByID(*identifiers): '''Retrieves a set of objects from their identifiers by pipelining the get commands ''' global redis_connection p = redis_connection.pipeline() # Stack the pipe calls for identifier in identifiers: num_blocks = int(redis_connection.llen(identifier)) p.lrange(identifier, 0, num_blocks) # Get all the objects ret = p.execute() # Deserialize and delete the serialized contents for each object for i in range(len(identifiers)): ret[i] = deserialize_from_string(b''.join(ret[i])) ret[i].pycompss_mark_as_unmodified() return ret[0] if len(ret) == 1 else ret
def get_input_params(num_params, logger, args, process_name, persistent_storage): pos = 0 values = [] types = [] streams = [] prefixes = [] def is_redis(): try: import storage.api return storage.api.__name__ == "redispycompss" except: # Could not import storage api return False if is_redis(): pre_pipeline = [] for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] if __debug__: logger.debug("[PYTHON WORKER %s] Parameter : %s" % (process_name, str(i))) logger.debug("[PYTHON WORKER %s] \t * Type : %s" % (process_name, str(pType))) logger.debug("[PYTHON WORKER %s] \t * Stream : %s" % (process_name, str(pStream))) logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" % (process_name, str(pPrefix))) logger.debug("[PYTHON WORKER %s] \t * Value: %r" % (process_name, pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: if is_redis(): po = pValue pre_pipeline.append((po, len(values))) else: po = get_by_ID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux.encode()) if aux.decode() == EMPTY_STRING_KEY: # Then it is an empty string aux = "" else: ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux) except (SerializerException, ValueError, EOFError): # was not an object aux = str(real_value.decode()) ####### values.append(aux) if __debug__: logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" % (process_name, str(aux))) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: my_l = long(pValue) if my_l > JAVA_MAX_INT or my_l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. my_l = int(my_l) values.append(my_l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal("[PYTHON WORKER %s] Invalid type (%d) for parameter %d" % (process_name, pType, i)) exit(1) pos += 4 if is_redis() and pre_pipeline: ids = [ident for (ident, _) in pre_pipeline] from storage.api import getByID retrieved_objects = getByID(*ids) if len(ids) == 1: retrieved_objects = [retrieved_objects] objindex = zip(retrieved_objects, [index for (_, index) in pre_pipeline]) for (obj, index) in objindex: values[index] = obj return values, types, streams, prefixes
def compss_worker(persistent_storage): """ Worker main method (invocated from __main__). :param persistent_storage: Persistent storage boolean :return: None """ logger = logging.getLogger('pycompss.worker.worker') logger.debug("Starting Worker") # Set the binding in worker mode set_pycompss_context('WORKER') args = sys.argv[6:] path = args[0] method_name = args[1] num_slaves = int(args[2]) slaves = [] for i in range(2, 2 + num_slaves): slaves.append(args[i]) arg_position = 3 + num_slaves args = args[arg_position:] cus = args[0] args = args[1:] has_target = args[0] return_type = args[1] num_returns = int(args[2]) num_params = int(args[3]) args = args[4:] # COMPSs keywords for tasks (ie: tracing, process name...) compss_kwargs = { 'compss_tracing': tracing, 'compss_process_name': "GAT", 'compss_storage_conf': storage_conf, 'compss_return_length': num_returns } values = [] types = [] streams = [] prefixes = [] if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, PARAMETER_PROCESSING) if persistent_storage: from pycompss.util.persistent_storage import storage_task_context # Get all parameter values logger.debug("Processing parameters:") pos = 0 for i in range(0, num_params): p_type = int(args[pos]) p_stream = int(args[pos + 1]) p_prefix = args[pos + 2] p_value = args[pos + 3] logger.debug("Parameter : " + str(i)) logger.debug("\t * Type : " + str(p_type)) logger.debug("\t * Stream : " + str(p_stream)) logger.debug("\t * Prefix : " + str(p_prefix)) logger.debug("\t * Value: " + str(p_value)) types.append(p_type) streams.append(p_stream) prefixes.append(p_prefix) if p_type == TYPE.FILE: values.append(p_value) elif p_type == TYPE.EXTERNAL_PSCO: po = get_by_id(p_value) values.append(po) pos += 1 # Skip info about direction (R, W) elif p_type == TYPE.STRING: num_substrings = int(p_value) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux.encode()) if aux.decode() == EMPTY_STRING_KEY: # Then it is an empty string aux = "" else: ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object if IS_PYTHON3: # decode removes double backslash, and encode returns as binary aux = deserialize_from_string(aux.decode(STR_ESCAPE).encode()) else: # decode removes double backslash, and str casts the output aux = deserialize_from_string(str(aux.decode(STR_ESCAPE))) except (SerializerException, ValueError, EOFError): # was not an object aux = str(real_value.decode()) ####### values.append(aux) logger.debug("\t * Final Value: " + str(aux)) pos += num_substrings elif p_type == TYPE.INT: values.append(int(p_value)) elif p_type == TYPE.LONG: my_l = long(p_value) if my_l > JAVA_MAX_INT or my_l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. my_l = int(my_l) values.append(my_l) elif p_type == TYPE.DOUBLE: values.append(float(p_value)) elif p_type == TYPE.BOOLEAN: if p_value == 'true': values.append(True) else: values.append(False) # elif (p_type == TYPE.OBJECT): # pass else: logger.fatal("Invalid type (%d) for parameter %d" % (p_type, i)) exit(1) pos += 4 if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, LOGGING) if logger.isEnabledFor(logging.DEBUG): values_str = '' types_str = '' for v in values: values_str += "\t\t" + str(v) + "\n" for t in types: types_str += str(t) + " " logger.debug("RUN TASK with arguments\n" + "\t- Path: " + path + "\n" + "\t- Method/function name: " + method_name + "\n" + "\t- Has target: " + has_target + "\n" + "\t- # parameters: " + str(num_params) + "\n" + "\t- Values:\n" + values_str + "\t- COMPSs types: " + types_str) if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, MODULES_IMPORT) try: # Try to import the module (for functions) logger.debug("Trying to import the user module.") if sys.version_info >= (2, 7): module = importlib.import_module(path) # Python 2.7 logger.debug("Module successfully loaded (Python version >= 2.7)") else: module = __import__(path, globals(), locals(), [path], -1) logger.debug("Module successfully loaded (Python version < 2.7") if persistent_storage: with storage_task_context(logger, values, config_file_path=storage_conf): getattr(module, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(module, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except AttributeError: # Appears with functions that have not been well defined. exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION - Attribute Error Exception") logger.exception(''.join(line for line in lines)) logger.exception("Check that all parameters have been defined with " + "an absolute import path (even if in the same file)") exit(1) # ========================================================================== except ImportError: logger.debug("Could not import the module. Reason: Method in class.") # Not the path of a module, it ends with a class name class_name = path.split('.')[-1] module_name = '.'.join(path.split('.')[0:-1]) if '.' in path: module_name = '.'.join(path.split('.')[0:-1]) else: module_name = path module = __import__(module_name, fromlist=[class_name]) klass = getattr(module, class_name) logger.debug("Method in class %s of module %s" % (class_name, module_name)) if has_target == 'true': # Instance method file_name = values.pop() logger.debug("Deserialize self from file.") obj = deserialize_from_file(file_name) logger.debug("Processing callee, a hidden object of %s in file %s" % (file_name, type(obj))) values.insert(0, obj) types.pop() types.insert(0, TYPE.OBJECT) if persistent_storage: with storage_task_context(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) logger.debug("Serializing self to file") logger.debug("Obj: " + str(obj)) serialize_to_file(obj, file_name) else: # Class method - class is not included in values (e.g. values = [7]) types.insert(0, None) # class must be first type if persistent_storage: with storage_task_context(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, **compss_kwargs) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION") logger.exception(''.join(line for line in lines)) exit(1)
def compss_worker(persistent_storage): """ Worker main method (invocated from __main__). """ logger = logging.getLogger('pycompss.worker.worker') logger.debug("Starting Worker") args = sys.argv[6:] path = args[0] method_name = args[1] numSlaves = int(args[2]) slaves = [] for i in range(2, 2 + numSlaves): slaves.append(args[i]) argPosition = 3 + numSlaves args = args[argPosition:] cus = args[0] args = args[1:] has_target = args[0] return_type = args[1] num_params = int(args[2]) args = args[3:] pos = 0 values = [] types = [] streams = [] prefixes = [] if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, PARAMETER_PROCESSING) if persistent_storage: from storage.api import getByID from storage.api import TaskContext # Get all parameter values logger.debug("Processing parameters:") for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] logger.debug("Parameter : " + str(i)) logger.debug("\t * Type : " + str(pType)) logger.debug("\t * Stream : " + str(pStream)) logger.debug("\t * Prefix : " + str(pPrefix)) logger.debug("\t * Value: " + str(pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: ''' # check if it is a persistent object # TODO: I find that it makes no sense to identify PSCOs this way # Why do not we simply check if the object of a subclass of the # storage_object? if 'getID' in dir(pValue) and pValue.getID() is not None: po = getByID(pValue.getID()) values.append(po) else: values.append(pValue) ''' values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: po = getByID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux) ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux.decode('string_escape')) except (SerializerException, ValueError, EOFError): # was not an object aux = real_value ####### values.append(aux) logger.debug("\t * Final Value: " + str(aux)) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: l = long(pValue) if l > JAVA_MAX_INT or l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. l = int(l) values.append(l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal("Invalid type (%d) for parameter %d" % (pType, i)) exit(1) pos += 4 if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, LOGGING) if logger.isEnabledFor(logging.DEBUG): values_str = '' types_str = '' for v in values: values_str += "\t\t" + str(v) + "\n" for t in types: types_str += str(t) + " " logger.debug("RUN TASK with arguments\n" + "\t- Path: " + path + "\n" + "\t- Method/function name: " + method_name + "\n" + "\t- Has target: " + has_target + "\n" + "\t- # parameters: " + str(num_params) + "\n" + "\t- Values:\n" + values_str + "\t- COMPSs types: " + types_str) if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, MODULES_IMPORT) try: # Try to import the module (for functions) logger.debug("Trying to import the user module.") if sys.version_info >= (2, 7): module = importlib.import_module(path) # Python 2.7 logger.debug("Module successfully loaded (Python version >= 2.7)") else: module = __import__(path, globals(), locals(), [path], -1) logger.debug("Module successfully loaded (Python version < 2.7") if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except AttributeError: # Appears with functions that have not been well defined. exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION - Attribute Error Exception") logger.exception(''.join(line for line in lines)) logger.exception("Check that all parameters have been defined with " + "an absolute import path (even if in the same file)") exit(1) # ========================================================================== except ImportError: logger.debug("Could not import the module. Reason: Method in class.") # Not the path of a module, it ends with a class name class_name = path.split('.')[-1] module_name = '.'.join(path.split('.')[0:-1]) if '.' in path: module_name = '.'.join(path.split('.')[0:-1]) else: module_name = path module = __import__(module_name, fromlist=[class_name]) klass = getattr(module, class_name) logger.debug("Method in class %s of module %s" % (class_name, module_name)) if has_target == 'true': # Instance method file_name = values.pop() logger.debug("Deserialize self from file.") obj = deserialize_from_file(file_name) logger.debug("Processing callee, a hidden object of %s in file %s" % (file_name, type(obj))) values.insert(0, obj) types.pop() types.insert(0, TYPE.OBJECT) if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) logger.debug("Serializing self to file") logger.debug("Obj: " + str(obj)) serialize_to_file(obj, file_name) else: # Class method - class is not included in values (e.g. values = [7]) types.insert(0, None) # class must be first type if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION") logger.exception(''.join(line for line in lines)) exit(1)
def get_input_params(num_params, logger, args, process_name, persistent_storage): if persistent_storage: from storage.api import getByID from storage.api import TaskContext pos = 0 values = [] types = [] streams = [] prefixes = [] for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] if logger.isEnabledFor(logging.DEBUG): logger.debug("[PYTHON WORKER %s] Parameter : %s" % (process_name, str(i))) logger.debug("[PYTHON WORKER %s] \t * Type : %s" % (process_name, str(pType))) logger.debug("[PYTHON WORKER %s] \t * Stream : %s" % (process_name, str(pStream))) logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" % (process_name, str(pPrefix))) logger.debug("[PYTHON WORKER %s] \t * Value: %r" % (process_name, pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: po = getByID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux) ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux.decode('string_escape')) except (SerializerException, ValueError, EOFError): # was not an object aux = real_value ####### values.append(aux) logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" % (process_name, str(aux))) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: l = long(pValue) if l > JAVA_MAX_INT or l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. l = int(l) values.append(l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal( "[PYTHON WORKER %s] Invalid type (%d) for parameter %d" % (process_name, pType, i)) exit(1) pos += 4 return values, types, streams, prefixes