コード例 #1
0
    def test_split_already_persistent(self):

        bn, bm = (2, 1)
        x = np.arange(100).reshape(10, -1)
        blocks = []
        for i in range(0, x.shape[0], bn):
            row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)]
            blocks.append(row)

        data = StorageNumpy(input_array=x, name="test_split_already_persistent")

        data.sync() # Flush values to cassandra
        for i, chunk in enumerate(data.np_split(block_size=(bn, bm))):
            storage_id = chunk.storage_id
            chunk.sync() #Flush data
            del chunk
            chunk = getByID(storage_id)
            self.assertTrue(np.array_equal(list(chunk), blocks[i]))

        del data
        gc.collect()

        data = StorageNumpy(name="test_split_already_persistent")
        self.assertTrue(np.array_equal(list(data), x))

        for i, chunk in enumerate(data.np_split(block_size=(bn, bm))):
            storage_id = chunk.storage_id
            chunk.sync() #Flush data
            del chunk
            chunk = getByID(storage_id)
            self.assertTrue(np.array_equal(list(chunk), blocks[i]))

        self.assertEqual(i + 1, len(blocks))
コード例 #2
0
    def reveal_objects(self, args, logger, python_mpi=False, collections_layouts=None):  # noqa
        # type: (tuple, logger, bool, list) -> None
        """ Get the objects from the args message.

        This function takes the arguments passed from the persistent worker
        and treats them to get the proper parameters for the user function.

        :param args: Arguments.
        :param logger: Logger (shadows outer logger since this is only used
                               in the worker to reveal the parameter objects).
        :param python_mpi: If the task is python MPI.
        :param collections_layouts: Layouts of collections params for python MPI tasks.
        :return: None
        """
        if self.storage_supports_pipelining():
            if __debug__:
                logger.debug("The storage supports pipelining.")
            # Perform the pipelined getByID operation
            pscos = [x for x in args if
                     x.content_type == parameter.TYPE.EXTERNAL_PSCO]
            identifiers = [x.content for x in pscos]
            from storage.api import getByID  # noqa
            objects = getByID(*identifiers)
            # Just update the Parameter object with its content
            for (obj, value) in zip(objects, pscos):
                obj.content = value

        # Deal with all the parameters that are NOT returns
        for arg in [x for x in args if
                    isinstance(x, Parameter) and not is_return(x.name)]:
            self.retrieve_content(arg, "", python_mpi, collections_layouts)
コード例 #3
0
    def test_build_remotely_iterkeys_split_test(self):
        tablename = 'tab30'
        config.session.execute('DROP TABLE IF EXISTS my_app.' + tablename)
        config.session.execute('DROP TABLE IF EXISTS my_app.' + tablename + '_words')
        sto = TestSimple(tablename)
        pd = sto.words
        num_inserts = 1000

        what_should_be = set()
        for i in range(num_inserts):
            pd[i] = 'ciao' + str(i)
            what_should_be.add(i)
        del pd, sto
        count, = config.session.execute('SELECT count(*) FROM my_app.' + tablename + '_words')[0]
        self.assertEqual(count, num_inserts)

        sto = TestSimple(tablename)
        pd = sto.words

        count = 0
        res = set()
        splits = 0
        for partition in pd.split():
            id = partition.getID()
            from storage.api import getByID
            rebuild = getByID(id)
            splits += 1
            for val in rebuild.iterkeys():
                res.add(val)
                count += 1
        del pd
        self.assertTrue(splits >= config.number_of_partitions)
        self.assertEqual(count, num_inserts)
        self.assertEqual(what_should_be, res)
コード例 #4
0
 def test_parse_index_on(self):
     a = TestStorageIndexedArgsObj()
     self.assertEqual(a.test._indexed_args, ['x', 'y', 'z'])
     a.make_persistent('tparse.t1')
     from storage.api import getByID
     b = getByID(a.getID())
     self.assertEqual(b.test._indexed_args, ['x', 'y', 'z'])
コード例 #5
0
 def test_getByID_block(self):
     # ki = KeyIter('testspace', 'tt', 'app.words.Words', 'fake-id', ['position'])
     SO = Words('so')
     b = SO.split().next()
     new_block = getByID(b.getID())
     self.assertEqual(b.getID(), new_block.getID())
     self.assertEqual(b, new_block)
コード例 #6
0
    def test_remote_build_iterkeys_split_test(self):
        config.session.execute("DROP TABLE IF EXISTS my_app.tab_b0")
        config.session.execute(
            "CREATE TABLE IF NOT EXISTS my_app.tab_b0(position int, value text, PRIMARY KEY(position))"
        )
        tablename = "tab_b0"
        pd = StorageDict(tablename, [('position', 'int')], [('value', 'text')])
        num_inserts = 10000
        what_should_be = set()
        for i in range(num_inserts):
            pd[i] = 'ciao' + str(i)
            what_should_be.add(i)
        del pd
        count, = config.session.execute(
            'SELECT count(*) FROM my_app.tab_b0')[0]
        self.assertEqual(count, num_inserts)

        pd = StorageDict(tablename, [('position', 'int')], [('value', 'text')])

        count = 0
        res = set()
        for partition in pd.split():
            id = partition.getID()
            from storage.api import getByID
            rebuild = getByID(id)
            for val in rebuild.iterkeys():
                res.add(val)
                count += 1
        self.assertEqual(count, num_inserts)
        self.assertEqual(what_should_be, res)
コード例 #7
0
ファイル: persistent_storage.py プロジェクト: mF2C/COMPSsOLD
def get_by_ID(id):
    '''
    Retrieve the actual object from a persistent object identificator.
    :param id: Persistent object identificator
    :return: The object that corresponds to the id
    '''
    return getByID(id)
コード例 #8
0
def main():
    r = redis.StrictRedis(host='localhost', port=6379, db=0)
    keys = r.keys()
    print("Number of keys: " + str(len(keys)))
    stapi.redis_connection = r
    for k in keys:
        v = stapi.getByID(k)
        print("KEY: " + str(k) + "  VALUE: " + str(v))
コード例 #9
0
ファイル: persistent.py プロジェクト: mF2C/compss
def get_by_id(id):
    """
    Retrieve the actual object from a persistent object identifier.

    :param id: Persistent object identifier
    :return: The object that corresponds to the id
    """
    return getByID(id)
コード例 #10
0
ファイル: persistent.py プロジェクト: curiousTauseef/compss
def get_by_id(identifier):
    # type: (str) -> object
    """ Retrieve the actual object from a persistent object identifier.

    :param identifier: Persistent object identifier.
    :return: The object that corresponds to the id.
    """
    return getByID(identifier)
コード例 #11
0
 def test_can_be_rebuild(self):
     it = QbeastIterator([('partid', 'int'), ('time', 'float')],
                         [('x', 'float'), ('y', 'float'),
                          ('z', 'float')], "test.particle",
                         QbeastMeta('', [-.5, -.5, -.5], [3, 3, 3], 0.9))
     from storage.api import getByID
     it2 = getByID(it.getID())
     self.assertEqual(it.getID(), it2.getID())
コード例 #12
0
ファイル: testRedis.py プロジェクト: class-euproject/compss
 def testTaskPersister(self):
     from pycompss.api.api import compss_wait_on as sync
     a = PSCO('Persisted in task')
     ID = psco_persister(a)
     ID = sync(ID)
     from storage.api import getByID
     an = getByID(ID)
     self.assertEqual('Persisted in task', an.get_content())
コード例 #13
0
ファイル: storage_api_tests.py プロジェクト: bsc-dd/hecuba
    def class_type_test(self):
        base_dict = ApiTestSDict('api_sdict')
        # PyCOMPSs requires uuid of type str
        storage_id = str(base_dict.storage_id)
        del base_dict

        rebuild_dict = getByID(storage_id)
        self.assertTrue(isinstance(rebuild_dict, ApiTestSDict))
        rebuild_dict.delete_persistent()
コード例 #14
0
ファイル: testRedis.py プロジェクト: class-euproject/compss
 def testPipeline(self):
     a = PSCO('a')
     b = PSCO('b')
     c = PSCO('c')
     a.make_persistent()
     b.make_persistent()
     c.make_persistent()
     from storage.api import getByID
     an, bn, cn = getByID(a.getID(), b.getID(), c.getID())
     self.assertEqual(a.get_content(), an.get_content())
     self.assertEqual(b.get_content(), bn.get_content())
     self.assertEqual(c.get_content(), cn.get_content())
コード例 #15
0
ファイル: testRedis.py プロジェクト: class-euproject/compss
 def testTaskPersister_inout(self):
     from pycompss.api.api import compss_wait_on as sync
     a = PSCO('Persisted in task')
     newId = psco_persister_inout(a)
     b = sync(a)
     newId = sync(newId)
     self.assertEqual(a.getID(), None)
     self.assertNotEqual(b.getID(), None)
     self.assertNotEqual(a.getID(), b.getID())
     self.assertEqual(b.getID(), newId)
     from storage.api import getByID
     bn = getByID(newId)
     self.assertEqual(a.get_content(), b.get_content(), bn.get_content())
コード例 #16
0
    def test_nestedso_notpersistent(self):
        config.session.execute("DROP TABLE IF EXISTS my_app.mynewso")
        config.session.execute("DROP TABLE IF EXISTS my_app.myso")

        my_nested_so = Test3StorageObj()

        my_nested_so.myso.name = 'Link'
        self.assertEquals('Link', my_nested_so.myso.name)
        my_nested_so.myso.age = 10
        self.assertEquals(10, my_nested_so.myso.age)

        error = False
        try:
            config.session.execute('SELECT * FROM my_app.myso')
        except cassandra.InvalidRequest:
            error = True
        self.assertEquals(True, error)

        my_nested_so.myso2.test[0] = 'position0'
        self.assertEquals('position0', my_nested_so.myso2.test[0])

        my_nested_so2 = Test4StorageObj()

        my_nested_so2.myotherso.name = 'Link'
        self.assertEquals('Link', my_nested_so2.myotherso.name)
        my_nested_so2.myotherso.age = 10
        self.assertEquals(10, my_nested_so2.myotherso.age)

        error = False
        try:
            config.session.execute('SELECT * FROM my_app.myso')
        except cassandra.InvalidRequest:
            error = True
        self.assertEquals(True, error)

        my_nested_so3 = Test4bStorageObj('mynested')
        my_nested_subso = my_nested_so3.myotherso

        my_other_nested = getByID(my_nested_subso.getID())
        my_other_nested.name = 'bla'
        my_other_nested.age = 5
        error = False
        try:
            result = config.session.execute('SELECT * FROM my_app.mynested_myotherso')
        except cassandra.InvalidRequest:
            error = True
        self.assertEquals(False, error)
        for row in result:
            query_res = row
        self.assertEquals(5, query_res.age)
        self.assertEquals('bla', query_res.name)
コード例 #17
0
    def testCanBeRebuilt(self):
        config.session.execute("DROP TABLE IF EXISTS my_app.indexed_dict")
        config.session.execute(
            "DROP TABLE IF EXISTS my_app_qbeast.indexed_dict_indexed_dict_idx_d8tree"
        )
        d = TestIndexObj("my_app.indexed_dict")
        for i in range(0, 30):
            d[i, i + 1.0] = [i * 0.1 / 9.0, i * 0.2 / 9.0, i * 0.3 / 9.0]

        time.sleep(1)
        filtered = filter(
            lambda row: row.x > 0.02 and row.x < 0.25 and row.y > 0.26 and row.
            y < 0.45 and row.z > 0.58 and row.z < 0.9, d.items())
        from storage.api import getByID
        for partition in filtered.split():
            it2 = getByID(partition.storage_id)
            self.assertEqual(filtered._qbeast_random, it2._qbeast_random)
コード例 #18
0
    def test_split_by_columns(self):
        """
        Tests iterating through the columns of the Hecuba array
        """
        bn, bm = (10, 1)
        x = np.arange(100).reshape(10, -1)
        blocks = []
        for i in range(0, x.shape[0], bn):
            row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)]
            blocks.append(row)

        data = StorageNumpy(input_array=x, name="test_split_by_columns")
        data.sync() # Flush values to cassandra
        for i, chunk in enumerate(data.np_split(block_size=(bn, bm))):
            storage_id = chunk.storage_id
            chunk.sync() #Flush data
            del chunk
            chunk = getByID(storage_id)
            self.assertTrue(np.array_equal(list(chunk), blocks[i]))

        self.assertEqual(i + 1, len(blocks))
コード例 #19
0
    def test_build_remotely_keys_split_test(self):
        tablename = 'tab30'
        sto = TestSimple(tablename)
        pd = sto.words
        tbl_name = pd._table
        num_inserts = 1000

        what_should_be = set()
        for i in range(num_inserts):
            pd[i] = 'ciao' + str(i)
            what_should_be.add(i)
        del pd, sto

        gc.collect()
        count, = config.session.execute('SELECT count(*) FROM ' + self.ksp +
                                        '.{}'.format(tbl_name))[0]
        self.assertEqual(count, num_inserts)

        sto = TestSimple(tablename)
        pd = sto.words

        count = 0
        res = set()
        splits = 0
        for partition in pd.split():
            id = partition.storage_id
            from storage.api import getByID
            rebuild = getByID(id)
            splits += 1
            for val in rebuild.keys():
                res.add(val)
                count += 1
        pd.delete_persistent()
        del pd
        self.assertTrue(splits >= config.splits_per_node * N_CASS_NODES)
        self.assertEqual(count, num_inserts)
        self.assertEqual(what_should_be, res)
コード例 #20
0
def get_input_params(num_params, logger, args, process_name, persistent_storage):
    pos = 0
    values = []
    types = []
    streams = []
    prefixes = []

    def is_redis():
        try:
            import storage.api
            return storage.api.__name__ == "redispycompss"
        except:
            # Could not import storage api
            return False

    if is_redis():
        pre_pipeline = []

    for i in range(0, num_params):
        pType = int(args[pos])
        pStream = int(args[pos + 1])
        pPrefix = args[pos + 2]
        pValue = args[pos + 3]

        if __debug__:
            logger.debug("[PYTHON WORKER %s] Parameter : %s" % (process_name, str(i)))
            logger.debug("[PYTHON WORKER %s] \t * Type : %s" % (process_name, str(pType)))
            logger.debug("[PYTHON WORKER %s] \t * Stream : %s" % (process_name, str(pStream)))
            logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" % (process_name, str(pPrefix)))
            logger.debug("[PYTHON WORKER %s] \t * Value: %r" % (process_name, pValue))

        types.append(pType)
        streams.append(pStream)
        prefixes.append(pPrefix)

        if pType == TYPE.FILE:
            values.append(pValue)
        elif pType == TYPE.EXTERNAL_PSCO:
            if is_redis():
                po = pValue
                pre_pipeline.append((po, len(values)))
            else:
                po = get_by_ID(pValue)
            values.append(po)
            pos += 1  # Skip info about direction (R, W)
        elif pType == TYPE.STRING:
            num_substrings = int(pValue)
            aux = ''
            first_substring = True
            for j in range(4, num_substrings + 4):
                if not first_substring:
                    aux += ' '
                first_substring = False
                aux += args[pos + j]
            # Decode the string received
            aux = base64.b64decode(aux.encode())
            if aux.decode() == EMPTY_STRING_KEY:
                # Then it is an empty string
                aux = ""
            else:
                #######
                # Check if the string is really an object
                # Required in order to recover objects passed as parameters.
                # - Option object_conversion
                real_value = aux
                try:
                    # try to recover the real object
                    aux = deserialize_from_string(aux)
                except (SerializerException, ValueError, EOFError):
                    # was not an object
                    aux = str(real_value.decode())
                #######
            values.append(aux)
            if __debug__:
                logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" % (process_name, str(aux)))
            pos += num_substrings
        elif pType == TYPE.INT:
            values.append(int(pValue))
        elif pType == TYPE.LONG:
            my_l = long(pValue)
            if my_l > JAVA_MAX_INT or my_l < JAVA_MIN_INT:
                # A Python int was converted to a Java long to prevent overflow
                # We are sure we will not overflow Python int, otherwise this
                # would have been passed as a serialized object.
                my_l = int(my_l)
            values.append(my_l)
        elif pType == TYPE.DOUBLE:
            values.append(float(pValue))
        elif pType == TYPE.BOOLEAN:
            if pValue == 'true':
                values.append(True)
            else:
                values.append(False)
        # elif (pType == TYPE.OBJECT):
        #    pass
        else:
            logger.fatal("[PYTHON WORKER %s] Invalid type (%d) for parameter %d" % (process_name, pType, i))
            exit(1)
        pos += 4
    if is_redis() and pre_pipeline:
        ids = [ident for (ident, _) in pre_pipeline]
        from storage.api import getByID
        retrieved_objects = getByID(*ids)
        if len(ids) == 1: retrieved_objects = [retrieved_objects]
        objindex = zip(retrieved_objects, [index for (_, index) in pre_pipeline])
        for (obj, index) in objindex:
            values[index] = obj

    return values, types, streams, prefixes
コード例 #21
0
def compss_worker(persistent_storage):
    """
    Worker main method (invocated from __main__).
    """
    logger = logging.getLogger('pycompss.worker.worker')

    logger.debug("Starting Worker")

    args = sys.argv[6:]
    path = args[0]
    method_name = args[1]

    numSlaves = int(args[2])
    slaves = []
    for i in range(2, 2 + numSlaves):
        slaves.append(args[i])
    argPosition = 3 + numSlaves

    args = args[argPosition:]
    cus = args[0]

    args = args[1:]
    has_target = args[0]
    return_type = args[1]
    num_params = int(args[2])

    args = args[3:]
    pos = 0

    values = []
    types = []
    streams = []
    prefixes = []

    if tracing:
        pyextrae.event(TASK_EVENTS, 0)
        pyextrae.event(TASK_EVENTS, PARAMETER_PROCESSING)

    if persistent_storage:
        from storage.api import getByID
        from storage.api import TaskContext

    # Get all parameter values
    logger.debug("Processing parameters:")
    for i in range(0, num_params):
        pType = int(args[pos])
        pStream = int(args[pos + 1])
        pPrefix = args[pos + 2]
        pValue = args[pos + 3]

        logger.debug("Parameter : " + str(i))
        logger.debug("\t * Type : " + str(pType))
        logger.debug("\t * Stream : " + str(pStream))
        logger.debug("\t * Prefix : " + str(pPrefix))
        logger.debug("\t * Value: " + str(pValue))

        types.append(pType)
        streams.append(pStream)
        prefixes.append(pPrefix)

        if pType == TYPE.FILE:
            '''
            # check if it is a persistent object
            # TODO: I find that it makes no sense to identify PSCOs this way
            # Why do not we simply check if the object of a subclass of the
            # storage_object?
            if 'getID' in dir(pValue) and pValue.getID() is not None:
                po = getByID(pValue.getID())
                values.append(po)
            else:
                values.append(pValue)
            '''
            values.append(pValue)
        elif pType == TYPE.EXTERNAL_PSCO:
            po = getByID(pValue)
            values.append(po)
            pos += 1  # Skip info about direction (R, W)
        elif pType == TYPE.STRING:
            num_substrings = int(pValue)
            aux = ''
            first_substring = True
            for j in range(4, num_substrings + 4):
                if not first_substring:
                    aux += ' '
                first_substring = False
                aux += args[pos + j]
            # Decode the string received
            aux = base64.b64decode(aux)
            #######
            # Check if the string is really an object
            # Required in order to recover objects passed as parameters.
            # - Option object_conversion
            real_value = aux
            try:
                # try to recover the real object
                aux = deserialize_from_string(aux.decode('string_escape'))
            except (SerializerException, ValueError, EOFError):
                # was not an object
                aux = real_value
            #######
            values.append(aux)
            logger.debug("\t * Final Value: " + str(aux))
            pos += num_substrings
        elif pType == TYPE.INT:
            values.append(int(pValue))
        elif pType == TYPE.LONG:
            l = long(pValue)
            if l > JAVA_MAX_INT or l < JAVA_MIN_INT:
                # A Python int was converted to a Java long to prevent overflow
                # We are sure we will not overflow Python int, otherwise this
                # would have been passed as a serialized object.
                l = int(l)
            values.append(l)
        elif pType == TYPE.DOUBLE:
            values.append(float(pValue))
        elif pType == TYPE.BOOLEAN:
            if pValue == 'true':
                values.append(True)
            else:
                values.append(False)
        # elif (pType == TYPE.OBJECT):
        #    pass
        else:
            logger.fatal("Invalid type (%d) for parameter %d" % (pType, i))
            exit(1)
        pos += 4

    if tracing:
        pyextrae.event(TASK_EVENTS, 0)
        pyextrae.event(TASK_EVENTS, LOGGING)

    if logger.isEnabledFor(logging.DEBUG):
        values_str = ''
        types_str = ''
        for v in values:
            values_str += "\t\t" + str(v) + "\n"
        for t in types:
            types_str += str(t) + " "
        logger.debug("RUN TASK with arguments\n" +
                     "\t- Path: " + path + "\n" +
                     "\t- Method/function name: " + method_name + "\n" +
                     "\t- Has target: " + has_target + "\n" +
                     "\t- # parameters: " + str(num_params) + "\n" +
                     "\t- Values:\n" + values_str +
                     "\t- COMPSs types: " + types_str)

    if tracing:
        pyextrae.event(TASK_EVENTS, 0)
        pyextrae.event(TASK_EVENTS, MODULES_IMPORT)

    try:
        # Try to import the module (for functions)
        logger.debug("Trying to import the user module.")
        if sys.version_info >= (2, 7):
            module = importlib.import_module(path)  # Python 2.7
            logger.debug("Module successfully loaded (Python version >= 2.7)")
        else:
            module = __import__(path, globals(), locals(), [path], -1)
            logger.debug("Module successfully loaded (Python version < 2.7")

        if persistent_storage:
            with TaskContext(logger, values, config_file_path=storage_conf):
                getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing)
                if tracing:
                    pyextrae.eventandcounters(TASK_EVENTS, 0)
                    pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)
        else:
            getattr(module, method_name)(*values, compss_types=types, compss_tracing=tracing)
            if tracing:
                pyextrae.eventandcounters(TASK_EVENTS, 0)
                pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)
    # ==========================================================================
    except AttributeError:
        # Appears with functions that have not been well defined.
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        logger.exception("WORKER EXCEPTION - Attribute Error Exception")
        logger.exception(''.join(line for line in lines))
        logger.exception("Check that all parameters have been defined with " +
                         "an absolute import path (even if in the same file)")
        exit(1)
    # ==========================================================================
    except ImportError:
        logger.debug("Could not import the module. Reason: Method in class.")
        # Not the path of a module, it ends with a class name
        class_name = path.split('.')[-1]
        module_name = '.'.join(path.split('.')[0:-1])

        if '.' in path:
            module_name = '.'.join(path.split('.')[0:-1])
        else:
            module_name = path
        module = __import__(module_name, fromlist=[class_name])
        klass = getattr(module, class_name)
        logger.debug("Method in class %s of module %s" % (class_name, module_name))

        if has_target == 'true':
            # Instance method
            file_name = values.pop()
            logger.debug("Deserialize self from file.")
            obj = deserialize_from_file(file_name)

            logger.debug("Processing callee, a hidden object of %s in file %s" % (file_name, type(obj)))
            values.insert(0, obj)
            types.pop()
            types.insert(0, TYPE.OBJECT)

            if persistent_storage:
                with TaskContext(logger, values, config_file_path=storage_conf):
                    getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing)
                    if tracing:
                        pyextrae.eventandcounters(TASK_EVENTS, 0)
                        pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)
            else:
                getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing)
                if tracing:
                    pyextrae.eventandcounters(TASK_EVENTS, 0)
                    pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)

            logger.debug("Serializing self to file")
            logger.debug("Obj: " + str(obj))
            serialize_to_file(obj, file_name)
        else:
            # Class method - class is not included in values (e.g. values = [7])
            types.insert(0, None)    # class must be first type

            if persistent_storage:
                with TaskContext(logger, values, config_file_path=storage_conf):
                    getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing)
                    if tracing:
                        pyextrae.eventandcounters(TASK_EVENTS, 0)
                        pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)
            else:
                getattr(klass, method_name)(*values, compss_types=types, compss_tracing=tracing)
                if tracing:
                    pyextrae.eventandcounters(TASK_EVENTS, 0)
                    pyextrae.eventandcounters(TASK_EVENTS, WORKER_END)
    # ==========================================================================
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        logger.exception("WORKER EXCEPTION")
        logger.exception(''.join(line for line in lines))
        exit(1)
コード例 #22
0
def get_input_params(num_params, logger, args, process_name,
                     persistent_storage):
    if persistent_storage:
        from storage.api import getByID
        from storage.api import TaskContext
    pos = 0
    values = []
    types = []
    streams = []
    prefixes = []
    for i in range(0, num_params):
        pType = int(args[pos])
        pStream = int(args[pos + 1])
        pPrefix = args[pos + 2]
        pValue = args[pos + 3]

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("[PYTHON WORKER %s] Parameter : %s" %
                         (process_name, str(i)))
            logger.debug("[PYTHON WORKER %s] \t * Type : %s" %
                         (process_name, str(pType)))
            logger.debug("[PYTHON WORKER %s] \t * Stream : %s" %
                         (process_name, str(pStream)))
            logger.debug("[PYTHON WORKER %s] \t * Prefix : %s" %
                         (process_name, str(pPrefix)))
            logger.debug("[PYTHON WORKER %s] \t * Value: %r" %
                         (process_name, pValue))

        types.append(pType)
        streams.append(pStream)
        prefixes.append(pPrefix)

        if pType == TYPE.FILE:
            values.append(pValue)
        elif pType == TYPE.EXTERNAL_PSCO:
            po = getByID(pValue)
            values.append(po)
            pos += 1  # Skip info about direction (R, W)
        elif pType == TYPE.STRING:
            num_substrings = int(pValue)
            aux = ''
            first_substring = True
            for j in range(4, num_substrings + 4):
                if not first_substring:
                    aux += ' '
                first_substring = False
                aux += args[pos + j]
            # Decode the string received
            aux = base64.b64decode(aux)
            #######
            # Check if the string is really an object
            # Required in order to recover objects passed as parameters.
            # - Option object_conversion
            real_value = aux
            try:
                # try to recover the real object
                aux = deserialize_from_string(aux.decode('string_escape'))
            except (SerializerException, ValueError, EOFError):
                # was not an object
                aux = real_value
            #######
            values.append(aux)
            logger.debug("[PYTHON WORKER %s] \t * Final Value: %s" %
                         (process_name, str(aux)))
            pos += num_substrings
        elif pType == TYPE.INT:
            values.append(int(pValue))
        elif pType == TYPE.LONG:
            l = long(pValue)
            if l > JAVA_MAX_INT or l < JAVA_MIN_INT:
                # A Python int was converted to a Java long to prevent overflow
                # We are sure we will not overflow Python int, otherwise this
                # would have been passed as a serialized object.
                l = int(l)
            values.append(l)
        elif pType == TYPE.DOUBLE:
            values.append(float(pValue))
        elif pType == TYPE.BOOLEAN:
            if pValue == 'true':
                values.append(True)
            else:
                values.append(False)
        # elif (pType == TYPE.OBJECT):
        #    pass
        else:
            logger.fatal(
                "[PYTHON WORKER %s] Invalid type (%d) for parameter %d" %
                (process_name, pType, i))
            exit(1)
        pos += 4
    return values, types, streams, prefixes
コード例 #23
0
 def test_getByID_storage_obj(self):
     b = Words('testspace.tt')
     new_block = getByID(b.getID())
     self.assertEqual(b, new_block)
コード例 #24
0
ファイル: storage_api_tests.py プロジェクト: bsc-dd/hecuba
 def test_getByID_storage_obj(self):
     b = Words('tt')
     new_block = getByID(b.storage_id)
     self.assertEqual(b, new_block)
     b.delete_persistent()
コード例 #25
0
    def retrieve_content(self, argument, name_prefix,
                         python_mpi, collections_layouts,
                         depth=0):
        # type: (Parameter, str, bool, list, int) -> None
        """ Retrieve the content of a particular argument.

        :param argument: Argument.
        :param name_prefix: Name prefix.
        :param python_mpi: If the task is python MPI.
        :param collections_layouts: Layouts of collections params for python MPI tasks.
        :param depth: Collection depth (0 if not a collection).
        :return: None
        """
        if __debug__:
            logger.debug("\t - Revealing: " + str(argument.name))
        # This case is special, as a FILE can actually mean a FILE or an
        # object that is serialized in a file
        if is_vararg(argument.name):
            self.param_varargs = argument.name
            if __debug__:
                logger.debug("\t\t - It is vararg")

        content_type = argument.content_type
        type_file = parameter.TYPE.FILE
        type_directory = parameter.TYPE.DIRECTORY
        type_external_stream = parameter.TYPE.EXTERNAL_STREAM
        type_collection = parameter.TYPE.COLLECTION
        type_external_psco = parameter.TYPE.EXTERNAL_PSCO

        if content_type == type_file:
            if self.is_parameter_an_object(argument.name):
                # The object is stored in some file, load and deserialize
                f_name = argument.file_name.split(':')[-1]
                if __debug__:
                    logger.debug("\t\t - It is an OBJECT. Deserializing from file: " + str(f_name))  # noqa: E501
                argument.content = deserialize_from_file(f_name)
                if __debug__:
                    logger.debug("\t\t - Deserialization finished")
            else:
                # The object is a FILE, just forward the path of the file
                # as a string parameter
                argument.content = argument.file_name.split(':')[-1]
                if __debug__:
                    logger.debug("\t\t - It is FILE: " + str(argument.content))
        elif content_type == type_directory:
            if __debug__:
                logger.debug("\t\t - It is a DIRECTORY")
            argument.content = argument.file_name.split(":")[-1]
        elif content_type == type_external_stream:
            if __debug__:
                logger.debug("\t\t - It is an EXTERNAL STREAM")
            argument.content = deserialize_from_file(argument.file_name)
        elif content_type == type_collection:
            argument.content = []
            # This field is exclusive for COLLECTION_T parameters, so make
            # sure you have checked this parameter is a collection before
            # consulting it
            argument.collection_content = []
            col_f_name = argument.file_name.split(':')[-1]

            # maybe it is an inner-collection..
            _dec_arg = self.decorator_arguments.get(argument.name, None)
            _col_dir = _dec_arg.direction if _dec_arg else None
            _col_dep = _dec_arg.depth if _dec_arg else depth
            if __debug__:
                logger.debug("\t\t - It is a COLLECTION: " + str(col_f_name))
                logger.debug("\t\t\t - Depth: " + str(_col_dep))

            # Check if this collection is in layout
            # Three conditions:
            # 1- this is a mpi task
            # 2- it has a collection layout
            # 3- the current argument is the layout target
            in_mpi_collection_env = False
            if python_mpi and collections_layouts and \
                    collections_layouts[0] == argument.name:
                in_mpi_collection_env = True
                from pycompss.util.mpi.helper import rank_distributor
                # Call rank_distributor if the current param is the target of
                # the layout for each rank, return its offset(s) in the
                # collection.
                rank_distribution = rank_distributor(collections_layouts[1:])
                rank_distr_len = len(rank_distribution)
                if __debug__:
                    logger.debug("Rank distribution is: " + str(rank_distribution))  # noqa: E501

            for (i, line) in enumerate(open(col_f_name, 'r')):
                if in_mpi_collection_env:
                    # Isn't this my offset? skip
                    if i not in rank_distribution:
                        continue
                data_type, content_file, content_type = line.strip().split()
                # Same naming convention as in COMPSsRuntimeImpl.java
                sub_name = "%s.%d" % (argument.name, i)
                if name_prefix:
                    sub_name = "%s.%s" % (name_prefix, argument.name)
                else:
                    sub_name = "@%s" % sub_name

                if __debug__:
                    logger.debug("\t\t\t - Revealing element: " +
                                 str(sub_name))

                if not self.is_parameter_file_collection(argument.name):
                    sub_arg, _ = build_task_parameter(int(data_type),
                                                      parameter.IOSTREAM.UNSPECIFIED,  # noqa: E501
                                                      "",
                                                      sub_name,
                                                      content_file,
                                                      argument.content_type)

                    # if direction of the collection is 'out', it means we
                    # haven't received serialized objects from the Master
                    # (even though parameters have 'file_name', those files
                    # haven't been created yet). plus, inner collections of
                    # col_out params do NOT have 'direction', we identify
                    # them by 'depth'..
                    if _col_dir == parameter.DIRECTION.OUT or \
                            ((_col_dir is None) and _col_dep > 0):
                        # if we are at the last level of COL_OUT param,
                        # create 'empty' instances of elements
                        if _col_dep == 1:
                            temp = create_object_by_con_type(content_type)
                            sub_arg.content = temp
                            # In case that only one element is used in this
                            # mpi rank, the collection list is removed
                            if in_mpi_collection_env and rank_distr_len == 1:
                                argument.content = sub_arg.content
                                argument.content_type = sub_arg.content_type
                            else:
                                argument.content.append(sub_arg.content)
                            argument.collection_content.append(sub_arg)
                        else:
                            self.retrieve_content(sub_arg,
                                                  sub_name,
                                                  python_mpi,
                                                  collections_layouts,
                                                  depth=_col_dep - 1)
                            # In case that only one element is used in this mpi
                            # rank, the collection list is removed
                            if in_mpi_collection_env and rank_distr_len == 1:
                                argument.content = sub_arg.content
                                argument.content_type = sub_arg.content_type
                            else:
                                argument.content.append(sub_arg.content)
                            argument.collection_content.append(sub_arg)
                    else:
                        # Recursively call the retrieve method, fill the
                        # content field in our new taskParameter object
                        self.retrieve_content(sub_arg, sub_name,
                                              python_mpi, collections_layouts)
                        # In case only one element is used in this mpi rank,
                        # the collection list is removed
                        if in_mpi_collection_env and rank_distr_len == 1:
                            argument.content = sub_arg.content
                            argument.content_type = sub_arg.content_type
                        else:
                            argument.content.append(sub_arg.content)
                        argument.collection_content.append(sub_arg)
                else:
                    # In case only one element is used in this mpi rank,
                    # the collection list is removed
                    if in_mpi_collection_env and rank_distr_len == 1:
                        argument.content = content_file
                        argument.content_type = parameter.TYPE.FILE
                    else:
                        argument.content.append(content_file)
                    argument.collection_content.append(content_file)

        elif not self.storage_supports_pipelining() and \
                content_type == type_external_psco:
            if __debug__:
                logger.debug("\t\t - It is a PSCO")
            # The object is a PSCO and the storage does not support
            # pipelining, do a single getByID of the PSCO
            from storage.api import getByID  # noqa
            argument.content = getByID(argument.content)