def test_split_already_persistent(self): bn, bm = (2, 1) x = np.arange(100).reshape(10, -1) blocks = [] for i in range(0, x.shape[0], bn): row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)] blocks.append(row) data = StorageNumpy(input_array=x, name="test_split_already_persistent") data.sync() # Flush values to cassandra for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) del data gc.collect() data = StorageNumpy(name="test_split_already_persistent") self.assertTrue(np.array_equal(list(data), x)) for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) self.assertEqual(i + 1, len(blocks))
def reveal_objects(self, args, logger, python_mpi=False, collections_layouts=None): # noqa # type: (tuple, logger, bool, list) -> None """ Get the objects from the args message. This function takes the arguments passed from the persistent worker and treats them to get the proper parameters for the user function. :param args: Arguments. :param logger: Logger (shadows outer logger since this is only used in the worker to reveal the parameter objects). :param python_mpi: If the task is python MPI. :param collections_layouts: Layouts of collections params for python MPI tasks. :return: None """ if self.storage_supports_pipelining(): if __debug__: logger.debug("The storage supports pipelining.") # Perform the pipelined getByID operation pscos = [x for x in args if x.content_type == parameter.TYPE.EXTERNAL_PSCO] identifiers = [x.content for x in pscos] from storage.api import getByID # noqa objects = getByID(*identifiers) # Just update the Parameter object with its content for (obj, value) in zip(objects, pscos): obj.content = value # Deal with all the parameters that are NOT returns for arg in [x for x in args if isinstance(x, Parameter) and not is_return(x.name)]: self.retrieve_content(arg, "", python_mpi, collections_layouts)
def test_build_remotely_iterkeys_split_test(self): tablename = 'tab30' config.session.execute('DROP TABLE IF EXISTS my_app.' + tablename) config.session.execute('DROP TABLE IF EXISTS my_app.' + tablename + '_words') sto = TestSimple(tablename) pd = sto.words num_inserts = 1000 what_should_be = set() for i in range(num_inserts): pd[i] = 'ciao' + str(i) what_should_be.add(i) del pd, sto count, = config.session.execute('SELECT count(*) FROM my_app.' + tablename + '_words')[0] self.assertEqual(count, num_inserts) sto = TestSimple(tablename) pd = sto.words count = 0 res = set() splits = 0 for partition in pd.split(): id = partition.getID() from storage.api import getByID rebuild = getByID(id) splits += 1 for val in rebuild.iterkeys(): res.add(val) count += 1 del pd self.assertTrue(splits >= config.number_of_partitions) self.assertEqual(count, num_inserts) self.assertEqual(what_should_be, res)
def test_parse_index_on(self): a = TestStorageIndexedArgsObj() self.assertEqual(a.test._indexed_args, ['x', 'y', 'z']) a.make_persistent('tparse.t1') from storage.api import getByID b = getByID(a.getID()) self.assertEqual(b.test._indexed_args, ['x', 'y', 'z'])
def test_getByID_block(self): # ki = KeyIter('testspace', 'tt', 'app.words.Words', 'fake-id', ['position']) SO = Words('so') b = SO.split().next() new_block = getByID(b.getID()) self.assertEqual(b.getID(), new_block.getID()) self.assertEqual(b, new_block)
def test_remote_build_iterkeys_split_test(self): config.session.execute("DROP TABLE IF EXISTS my_app.tab_b0") config.session.execute( "CREATE TABLE IF NOT EXISTS my_app.tab_b0(position int, value text, PRIMARY KEY(position))" ) tablename = "tab_b0" pd = StorageDict(tablename, [('position', 'int')], [('value', 'text')]) num_inserts = 10000 what_should_be = set() for i in range(num_inserts): pd[i] = 'ciao' + str(i) what_should_be.add(i) del pd count, = config.session.execute( 'SELECT count(*) FROM my_app.tab_b0')[0] self.assertEqual(count, num_inserts) pd = StorageDict(tablename, [('position', 'int')], [('value', 'text')]) count = 0 res = set() for partition in pd.split(): id = partition.getID() from storage.api import getByID rebuild = getByID(id) for val in rebuild.iterkeys(): res.add(val) count += 1 self.assertEqual(count, num_inserts) self.assertEqual(what_should_be, res)
def get_by_ID(id): ''' Retrieve the actual object from a persistent object identificator. :param id: Persistent object identificator :return: The object that corresponds to the id ''' return getByID(id)
def main(): r = redis.StrictRedis(host='localhost', port=6379, db=0) keys = r.keys() print("Number of keys: " + str(len(keys))) stapi.redis_connection = r for k in keys: v = stapi.getByID(k) print("KEY: " + str(k) + " VALUE: " + str(v))
def get_by_id(id): """ Retrieve the actual object from a persistent object identifier. :param id: Persistent object identifier :return: The object that corresponds to the id """ return getByID(id)
def get_by_id(identifier): # type: (str) -> object """ Retrieve the actual object from a persistent object identifier. :param identifier: Persistent object identifier. :return: The object that corresponds to the id. """ return getByID(identifier)
def test_can_be_rebuild(self): it = QbeastIterator([('partid', 'int'), ('time', 'float')], [('x', 'float'), ('y', 'float'), ('z', 'float')], "test.particle", QbeastMeta('', [-.5, -.5, -.5], [3, 3, 3], 0.9)) from storage.api import getByID it2 = getByID(it.getID()) self.assertEqual(it.getID(), it2.getID())
def testTaskPersister(self): from pycompss.api.api import compss_wait_on as sync a = PSCO('Persisted in task') ID = psco_persister(a) ID = sync(ID) from storage.api import getByID an = getByID(ID) self.assertEqual('Persisted in task', an.get_content())
def class_type_test(self): base_dict = ApiTestSDict('api_sdict') # PyCOMPSs requires uuid of type str storage_id = str(base_dict.storage_id) del base_dict rebuild_dict = getByID(storage_id) self.assertTrue(isinstance(rebuild_dict, ApiTestSDict)) rebuild_dict.delete_persistent()
def testPipeline(self): a = PSCO('a') b = PSCO('b') c = PSCO('c') a.make_persistent() b.make_persistent() c.make_persistent() from storage.api import getByID an, bn, cn = getByID(a.getID(), b.getID(), c.getID()) self.assertEqual(a.get_content(), an.get_content()) self.assertEqual(b.get_content(), bn.get_content()) self.assertEqual(c.get_content(), cn.get_content())
def testTaskPersister_inout(self): from pycompss.api.api import compss_wait_on as sync a = PSCO('Persisted in task') newId = psco_persister_inout(a) b = sync(a) newId = sync(newId) self.assertEqual(a.getID(), None) self.assertNotEqual(b.getID(), None) self.assertNotEqual(a.getID(), b.getID()) self.assertEqual(b.getID(), newId) from storage.api import getByID bn = getByID(newId) self.assertEqual(a.get_content(), b.get_content(), bn.get_content())
def test_nestedso_notpersistent(self): config.session.execute("DROP TABLE IF EXISTS my_app.mynewso") config.session.execute("DROP TABLE IF EXISTS my_app.myso") my_nested_so = Test3StorageObj() my_nested_so.myso.name = 'Link' self.assertEquals('Link', my_nested_so.myso.name) my_nested_so.myso.age = 10 self.assertEquals(10, my_nested_so.myso.age) error = False try: config.session.execute('SELECT * FROM my_app.myso') except cassandra.InvalidRequest: error = True self.assertEquals(True, error) my_nested_so.myso2.test[0] = 'position0' self.assertEquals('position0', my_nested_so.myso2.test[0]) my_nested_so2 = Test4StorageObj() my_nested_so2.myotherso.name = 'Link' self.assertEquals('Link', my_nested_so2.myotherso.name) my_nested_so2.myotherso.age = 10 self.assertEquals(10, my_nested_so2.myotherso.age) error = False try: config.session.execute('SELECT * FROM my_app.myso') except cassandra.InvalidRequest: error = True self.assertEquals(True, error) my_nested_so3 = Test4bStorageObj('mynested') my_nested_subso = my_nested_so3.myotherso my_other_nested = getByID(my_nested_subso.getID()) my_other_nested.name = 'bla' my_other_nested.age = 5 error = False try: result = config.session.execute('SELECT * FROM my_app.mynested_myotherso') except cassandra.InvalidRequest: error = True self.assertEquals(False, error) for row in result: query_res = row self.assertEquals(5, query_res.age) self.assertEquals('bla', query_res.name)
def testCanBeRebuilt(self): config.session.execute("DROP TABLE IF EXISTS my_app.indexed_dict") config.session.execute( "DROP TABLE IF EXISTS my_app_qbeast.indexed_dict_indexed_dict_idx_d8tree" ) d = TestIndexObj("my_app.indexed_dict") for i in range(0, 30): d[i, i + 1.0] = [i * 0.1 / 9.0, i * 0.2 / 9.0, i * 0.3 / 9.0] time.sleep(1) filtered = filter( lambda row: row.x > 0.02 and row.x < 0.25 and row.y > 0.26 and row. y < 0.45 and row.z > 0.58 and row.z < 0.9, d.items()) from storage.api import getByID for partition in filtered.split(): it2 = getByID(partition.storage_id) self.assertEqual(filtered._qbeast_random, it2._qbeast_random)
def test_split_by_columns(self): """ Tests iterating through the columns of the Hecuba array """ bn, bm = (10, 1) x = np.arange(100).reshape(10, -1) blocks = [] for i in range(0, x.shape[0], bn): row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)] blocks.append(row) data = StorageNumpy(input_array=x, name="test_split_by_columns") data.sync() # Flush values to cassandra for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) self.assertEqual(i + 1, len(blocks))
def test_build_remotely_keys_split_test(self): tablename = 'tab30' sto = TestSimple(tablename) pd = sto.words tbl_name = pd._table num_inserts = 1000 what_should_be = set() for i in range(num_inserts): pd[i] = 'ciao' + str(i) what_should_be.add(i) del pd, sto gc.collect() count, = config.session.execute('SELECT count(*) FROM ' + self.ksp + '.{}'.format(tbl_name))[0] self.assertEqual(count, num_inserts) sto = TestSimple(tablename) pd = sto.words count = 0 res = set() splits = 0 for partition in pd.split(): id = partition.storage_id from storage.api import getByID rebuild = getByID(id) splits += 1 for val in rebuild.keys(): res.add(val) count += 1 pd.delete_persistent() del pd self.assertTrue(splits >= config.splits_per_node * N_CASS_NODES) self.assertEqual(count, num_inserts) self.assertEqual(what_should_be, res)
def get_input_params(num_params, logger, args, process_name, persistent_storage): pos = 0 values = [] types = [] streams = [] prefixes = [] def is_redis(): try: import storage.api return storage.api.__name__ == "redispycompss" except: # Could not import storage api return False if is_redis(): pre_pipeline = [] for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] if __debug__: logger.debug("[PYTHON WORKER %s] Parameter : %s" % (process_name, str(i))) logger.debug("[PYTHON WORKER %s] \t * Type : %s" % (process_name, str(pType))) logger.debug("[PYTHON WORKER %s] \t * Stream : %s" % (process_name, str(pStream))) logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" % (process_name, str(pPrefix))) logger.debug("[PYTHON WORKER %s] \t * Value: %r" % (process_name, pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: if is_redis(): po = pValue pre_pipeline.append((po, len(values))) else: po = get_by_ID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux.encode()) if aux.decode() == EMPTY_STRING_KEY: # Then it is an empty string aux = "" else: ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux) except (SerializerException, ValueError, EOFError): # was not an object aux = str(real_value.decode()) ####### values.append(aux) if __debug__: logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" % (process_name, str(aux))) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: my_l = long(pValue) if my_l > JAVA_MAX_INT or my_l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. my_l = int(my_l) values.append(my_l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal("[PYTHON WORKER %s] Invalid type (%d) for parameter %d" % (process_name, pType, i)) exit(1) pos += 4 if is_redis() and pre_pipeline: ids = [ident for (ident, _) in pre_pipeline] from storage.api import getByID retrieved_objects = getByID(*ids) if len(ids) == 1: retrieved_objects = [retrieved_objects] objindex = zip(retrieved_objects, [index for (_, index) in pre_pipeline]) for (obj, index) in objindex: values[index] = obj return values, types, streams, prefixes
def compss_worker(persistent_storage): """ Worker main method (invocated from __main__). """ logger = logging.getLogger('pycompss.worker.worker') logger.debug("Starting Worker") args = sys.argv[6:] path = args[0] method_name = args[1] numSlaves = int(args[2]) slaves = [] for i in range(2, 2 + numSlaves): slaves.append(args[i]) argPosition = 3 + numSlaves args = args[argPosition:] cus = args[0] args = args[1:] has_target = args[0] return_type = args[1] num_params = int(args[2]) args = args[3:] pos = 0 values = [] types = [] streams = [] prefixes = [] if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, PARAMETER_PROCESSING) if persistent_storage: from storage.api import getByID from storage.api import TaskContext # Get all parameter values logger.debug("Processing parameters:") for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] logger.debug("Parameter : " + str(i)) logger.debug("\t * Type : " + str(pType)) logger.debug("\t * Stream : " + str(pStream)) logger.debug("\t * Prefix : " + str(pPrefix)) logger.debug("\t * Value: " + str(pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: ''' # check if it is a persistent object # TODO: I find that it makes no sense to identify PSCOs this way # Why do not we simply check if the object of a subclass of the # storage_object? if 'getID' in dir(pValue) and pValue.getID() is not None: po = getByID(pValue.getID()) values.append(po) else: values.append(pValue) ''' values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: po = getByID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux) ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux.decode('string_escape')) except (SerializerException, ValueError, EOFError): # was not an object aux = real_value ####### values.append(aux) logger.debug("\t * Final Value: " + str(aux)) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: l = long(pValue) if l > JAVA_MAX_INT or l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. l = int(l) values.append(l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal("Invalid type (%d) for parameter %d" % (pType, i)) exit(1) pos += 4 if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, LOGGING) if logger.isEnabledFor(logging.DEBUG): values_str = '' types_str = '' for v in values: values_str += "\t\t" + str(v) + "\n" for t in types: types_str += str(t) + " " logger.debug("RUN TASK with arguments\n" + "\t- Path: " + path + "\n" + "\t- Method/function name: " + method_name + "\n" + "\t- Has target: " + has_target + "\n" + "\t- # parameters: " + str(num_params) + "\n" + "\t- Values:\n" + values_str + "\t- COMPSs types: " + types_str) if tracing: pyextrae.event(TASK_EVENTS, 0) pyextrae.event(TASK_EVENTS, MODULES_IMPORT) try: # Try to import the module (for functions) logger.debug("Trying to import the user module.") if sys.version_info >= (2, 7): module = importlib.import_module(path) # Python 2.7 logger.debug("Module successfully loaded (Python version >= 2.7)") else: module = __import__(path, globals(), locals(), [path], -1) logger.debug("Module successfully loaded (Python version < 2.7") if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except AttributeError: # Appears with functions that have not been well defined. exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION - Attribute Error Exception") logger.exception(''.join(line for line in lines)) logger.exception("Check that all parameters have been defined with " + "an absolute import path (even if in the same file)") exit(1) # ========================================================================== except ImportError: logger.debug("Could not import the module. Reason: Method in class.") # Not the path of a module, it ends with a class name class_name = path.split('.')[-1] module_name = '.'.join(path.split('.')[0:-1]) if '.' in path: module_name = '.'.join(path.split('.')[0:-1]) else: module_name = path module = __import__(module_name, fromlist=[class_name]) klass = getattr(module, class_name) logger.debug("Method in class %s of module %s" % (class_name, module_name)) if has_target == 'true': # Instance method file_name = values.pop() logger.debug("Deserialize self from file.") obj = deserialize_from_file(file_name) logger.debug("Processing callee, a hidden object of %s in file %s" % (file_name, type(obj))) values.insert(0, obj) types.pop() types.insert(0, TYPE.OBJECT) if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) logger.debug("Serializing self to file") logger.debug("Obj: " + str(obj)) serialize_to_file(obj, file_name) else: # Class method - class is not included in values (e.g. values = [7]) types.insert(0, None) # class must be first type if persistent_storage: with TaskContext(logger, values, config_file_path=storage_conf): getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) else: getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing) if tracing: pyextrae.eventandcounters(TASK_EVENTS, 0) pyextrae.eventandcounters(TASK_EVENTS, WORKER_END) # ========================================================================== except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("WORKER EXCEPTION") logger.exception(''.join(line for line in lines)) exit(1)
def get_input_params(num_params, logger, args, process_name, persistent_storage): if persistent_storage: from storage.api import getByID from storage.api import TaskContext pos = 0 values = [] types = [] streams = [] prefixes = [] for i in range(0, num_params): pType = int(args[pos]) pStream = int(args[pos + 1]) pPrefix = args[pos + 2] pValue = args[pos + 3] if logger.isEnabledFor(logging.DEBUG): logger.debug("[PYTHON WORKER %s] Parameter : %s" % (process_name, str(i))) logger.debug("[PYTHON WORKER %s] \t * Type : %s" % (process_name, str(pType))) logger.debug("[PYTHON WORKER %s] \t * Stream : %s" % (process_name, str(pStream))) logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" % (process_name, str(pPrefix))) logger.debug("[PYTHON WORKER %s] \t * Value: %r" % (process_name, pValue)) types.append(pType) streams.append(pStream) prefixes.append(pPrefix) if pType == TYPE.FILE: values.append(pValue) elif pType == TYPE.EXTERNAL_PSCO: po = getByID(pValue) values.append(po) pos += 1 # Skip info about direction (R, W) elif pType == TYPE.STRING: num_substrings = int(pValue) aux = '' first_substring = True for j in range(4, num_substrings + 4): if not first_substring: aux += ' ' first_substring = False aux += args[pos + j] # Decode the string received aux = base64.b64decode(aux) ####### # Check if the string is really an object # Required in order to recover objects passed as parameters. # - Option object_conversion real_value = aux try: # try to recover the real object aux = deserialize_from_string(aux.decode('string_escape')) except (SerializerException, ValueError, EOFError): # was not an object aux = real_value ####### values.append(aux) logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" % (process_name, str(aux))) pos += num_substrings elif pType == TYPE.INT: values.append(int(pValue)) elif pType == TYPE.LONG: l = long(pValue) if l > JAVA_MAX_INT or l < JAVA_MIN_INT: # A Python int was converted to a Java long to prevent overflow # We are sure we will not overflow Python int, otherwise this # would have been passed as a serialized object. l = int(l) values.append(l) elif pType == TYPE.DOUBLE: values.append(float(pValue)) elif pType == TYPE.BOOLEAN: if pValue == 'true': values.append(True) else: values.append(False) # elif (pType == TYPE.OBJECT): # pass else: logger.fatal( "[PYTHON WORKER %s] Invalid type (%d) for parameter %d" % (process_name, pType, i)) exit(1) pos += 4 return values, types, streams, prefixes
def test_getByID_storage_obj(self): b = Words('testspace.tt') new_block = getByID(b.getID()) self.assertEqual(b, new_block)
def test_getByID_storage_obj(self): b = Words('tt') new_block = getByID(b.storage_id) self.assertEqual(b, new_block) b.delete_persistent()
def retrieve_content(self, argument, name_prefix, python_mpi, collections_layouts, depth=0): # type: (Parameter, str, bool, list, int) -> None """ Retrieve the content of a particular argument. :param argument: Argument. :param name_prefix: Name prefix. :param python_mpi: If the task is python MPI. :param collections_layouts: Layouts of collections params for python MPI tasks. :param depth: Collection depth (0 if not a collection). :return: None """ if __debug__: logger.debug("\t - Revealing: " + str(argument.name)) # This case is special, as a FILE can actually mean a FILE or an # object that is serialized in a file if is_vararg(argument.name): self.param_varargs = argument.name if __debug__: logger.debug("\t\t - It is vararg") content_type = argument.content_type type_file = parameter.TYPE.FILE type_directory = parameter.TYPE.DIRECTORY type_external_stream = parameter.TYPE.EXTERNAL_STREAM type_collection = parameter.TYPE.COLLECTION type_external_psco = parameter.TYPE.EXTERNAL_PSCO if content_type == type_file: if self.is_parameter_an_object(argument.name): # The object is stored in some file, load and deserialize f_name = argument.file_name.split(':')[-1] if __debug__: logger.debug("\t\t - It is an OBJECT. Deserializing from file: " + str(f_name)) # noqa: E501 argument.content = deserialize_from_file(f_name) if __debug__: logger.debug("\t\t - Deserialization finished") else: # The object is a FILE, just forward the path of the file # as a string parameter argument.content = argument.file_name.split(':')[-1] if __debug__: logger.debug("\t\t - It is FILE: " + str(argument.content)) elif content_type == type_directory: if __debug__: logger.debug("\t\t - It is a DIRECTORY") argument.content = argument.file_name.split(":")[-1] elif content_type == type_external_stream: if __debug__: logger.debug("\t\t - It is an EXTERNAL STREAM") argument.content = deserialize_from_file(argument.file_name) elif content_type == type_collection: argument.content = [] # This field is exclusive for COLLECTION_T parameters, so make # sure you have checked this parameter is a collection before # consulting it argument.collection_content = [] col_f_name = argument.file_name.split(':')[-1] # maybe it is an inner-collection.. _dec_arg = self.decorator_arguments.get(argument.name, None) _col_dir = _dec_arg.direction if _dec_arg else None _col_dep = _dec_arg.depth if _dec_arg else depth if __debug__: logger.debug("\t\t - It is a COLLECTION: " + str(col_f_name)) logger.debug("\t\t\t - Depth: " + str(_col_dep)) # Check if this collection is in layout # Three conditions: # 1- this is a mpi task # 2- it has a collection layout # 3- the current argument is the layout target in_mpi_collection_env = False if python_mpi and collections_layouts and \ collections_layouts[0] == argument.name: in_mpi_collection_env = True from pycompss.util.mpi.helper import rank_distributor # Call rank_distributor if the current param is the target of # the layout for each rank, return its offset(s) in the # collection. rank_distribution = rank_distributor(collections_layouts[1:]) rank_distr_len = len(rank_distribution) if __debug__: logger.debug("Rank distribution is: " + str(rank_distribution)) # noqa: E501 for (i, line) in enumerate(open(col_f_name, 'r')): if in_mpi_collection_env: # Isn't this my offset? skip if i not in rank_distribution: continue data_type, content_file, content_type = line.strip().split() # Same naming convention as in COMPSsRuntimeImpl.java sub_name = "%s.%d" % (argument.name, i) if name_prefix: sub_name = "%s.%s" % (name_prefix, argument.name) else: sub_name = "@%s" % sub_name if __debug__: logger.debug("\t\t\t - Revealing element: " + str(sub_name)) if not self.is_parameter_file_collection(argument.name): sub_arg, _ = build_task_parameter(int(data_type), parameter.IOSTREAM.UNSPECIFIED, # noqa: E501 "", sub_name, content_file, argument.content_type) # if direction of the collection is 'out', it means we # haven't received serialized objects from the Master # (even though parameters have 'file_name', those files # haven't been created yet). plus, inner collections of # col_out params do NOT have 'direction', we identify # them by 'depth'.. if _col_dir == parameter.DIRECTION.OUT or \ ((_col_dir is None) and _col_dep > 0): # if we are at the last level of COL_OUT param, # create 'empty' instances of elements if _col_dep == 1: temp = create_object_by_con_type(content_type) sub_arg.content = temp # In case that only one element is used in this # mpi rank, the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: self.retrieve_content(sub_arg, sub_name, python_mpi, collections_layouts, depth=_col_dep - 1) # In case that only one element is used in this mpi # rank, the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: # Recursively call the retrieve method, fill the # content field in our new taskParameter object self.retrieve_content(sub_arg, sub_name, python_mpi, collections_layouts) # In case only one element is used in this mpi rank, # the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: # In case only one element is used in this mpi rank, # the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = content_file argument.content_type = parameter.TYPE.FILE else: argument.content.append(content_file) argument.collection_content.append(content_file) elif not self.storage_supports_pipelining() and \ content_type == type_external_psco: if __debug__: logger.debug("\t\t - It is a PSCO") # The object is a PSCO and the storage does not support # pipelining, do a single getByID of the PSCO from storage.api import getByID # noqa argument.content = getByID(argument.content)