Exemple #1
0
def test_threading():
    cache = Cache({"maxCacheSize": 4})
    # fill cache
    startTime = datetime.datetime.now()
    for i in range(10000):
        values = (str(i), {"bytes": bytearray(500)}, 500)
        cache._cache[values[0]] = {
            'timestamp': startTime + datetime.timedelta(0, 1),
            'size': values[2],
            'value': values[1],
            'header': None
        }
    request = []
    for i in range(1000):
        request.append(('key' + str(i), {"bytes": bytearray(50000)}, 50000))
    res = []

    def process(item):
        cache.update(item[0], item[1])
        return item[0]

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        for out in executor.map(process, request):
            res.append(out)
    assert len(res) == 1000
Exemple #2
0
class DataServer:
    def __init__(self, config):
        self._cache = Cache(config)
        self._host = config['host']
        self._port = config['port']
        self._encodingType = config['encoding']
        self._encoding = Encoding(self._encodingType)

        self._adapter = ZMQServers(self._port, self._createReply, config)
        self.notAvailable = self._encoding.encode(
            self._createError('notAvailable', 'taskId notAvailable'))

    def listen(self):
        log.info('discovery serving on {host}:{port} with {encoding} encoding',
                 host=self._host,
                 port=self._port,
                 encoding=self._encodingType)
        self._adapter.listen()

    @timing
    def _createReply(self, message):
        try:
            decoded = self._encoding.decode(value=message, plainEncode=True)
            tasks = decoded.get('tasks')
            resultsAsTuple = self.getDataByTaskId(tasks)

        except Exception as e:
            result = self._createError('unknown', str(e))
            header, encoded = self._encoding.encode(result)
            return [header, encoded]
        parts = []
        for header, content in resultsAsTuple:
            parts.append(header)
            parts.append(content)
        return parts

    def getDataByTaskId(self, tasks):
        results = []
        for task in tasks:
            if (task not in self._cache):
                result = self.notAvailable
            else:
                result = self._cache.getWithHeader(task)
            results.append(result)
        return results

    def setSendingState(self, taskId, header, encoded, size):
        return self._cache.update(taskId, encoded, size=size, header=header)

    def _createError(self, code, message):
        return {'hkube_error': {'code': code, 'message': message}}

    def isLocal(self, host, port):
        return host == self._host and port == self._port

    def isServing(self):
        return self._adapter.isServing()

    def shutDown(self):
        self._adapter.close()
Exemple #3
0
def test_too_large_message():
    cache = Cache({"maxCacheSize": 5})
    value1 = {"bytes": bytearray(1000000)}
    value2 = {"bytes": bytearray(5000000)}
    result = cache.update("task1", value1, 1000000)
    assert result == True
    assert len(cache._cache) == 1
    result = cache.update("task2", value2, 6000000)
    assert result == False
    assert len(cache._cache) == 1
Exemple #4
0
    def __init__(self, config):
        self._cache = Cache(config)
        self._host = config['host']
        self._port = config['port']
        self._encodingType = config['encoding']
        self._encoding = Encoding(self._encodingType)

        self._adapter = ZMQServers(self._port, self._createReply, config)
        self.notAvailable = self._encoding.encode(
            self._createError('notAvailable', 'taskId notAvailable'))
 def __init__(self, options, dataServer=None):
     self._dataServer = dataServer
     self._storageCache = Cache(config.storage)
     self._encoding = Encoding(options.storage['encoding'])
     self._storageManager = StorageManager(options.storage)
     self._requestEncoding = options.storage['encoding']
     self._requestTimeout = options.discovery['timeout']
     self._networkTimeout = options.discovery['networkTimeout']
     self._maxWorkers = min(32, (multiprocessing.cpu_count() or 1) + 4)
     log.info('using {workers} workers for DataAdapter', workers=self._maxWorkers)
Exemple #6
0
def test_reaching_limit():
    cache = Cache({"maxCacheSize": 4})
    value1 = {"bytes": bytearray(1000000)}
    cache.update("task1", value1, 1000000)
    assert len(cache._cache) == 1
    cache.update("task2", value1, 1000000)
    cache.update("task3", value1, 1000000)
    assert len(cache._cache) == 3
    cache.update("task4", value1, 1500000)
    assert len(cache._cache) == 3
    assert "task4" in cache
    assert "task1" not in cache
Exemple #7
0
def test_get_all():
    cache = Cache({"maxCacheSize": 5})
    value1 = {"data": "1"}
    value2 = {"data": "2"}
    result = cache.update("task1", value1)
    assert result == True
    assert len(cache._cache) == 1
    cache.update("task2", value2)
    assert len(cache._cache) == 2
    tasksNotInCache, valuesInCache = cache.getAll(
        ['task1', 'task2', 'task3', 'task4'])
    assert len(tasksNotInCache) == 2
    assert "task3" in set(tasksNotInCache)
    assert "task4" in set(tasksNotInCache)
    assert len(valuesInCache) == 2
    data = list(map(lambda value: value.get('data'), valuesInCache))
    assert "1" in set(data)
    assert "2" in set(data)
class DataAdapter:
    def __init__(self, options, dataServer=None):
        self._dataServer = dataServer
        self._storageCache = Cache(config.storage)
        self._encoding = Encoding(options.storage['encoding'])
        self._storageManager = StorageManager(options.storage)
        self._requestEncoding = options.storage['encoding']
        self._requestTimeout = options.discovery['timeout']
        self._networkTimeout = options.discovery['networkTimeout']
        self._maxWorkers = min(32, (multiprocessing.cpu_count() or 1) + 4)
        log.info('using {workers} workers for DataAdapter', workers=self._maxWorkers)

    def encode(self, value):
        return self._encoding.encode(value)

    def decode(self, header=None, value=None):
        return self._encoding.decode(header=header, value=value)

    @trace()
    def getData(self, options):
        jobId = options.get('jobId')
        inputArgs = options.get('input')
        flatInput = options.get('flatInput')
        storage = options.get('storage')

        if (not flatInput):
            return inputArgs

        for k, v in flatInput.items():
            if self._isStorage(v):
                key = v[2:]
                link = storage.get(key, None)
                if (link is None):
                    raise Exception('unable to find storage key')

                if (typeCheck.isList(link)):
                    data = self.batchRequest(link, jobId)
                else:
                    data = self.tryGetDataFromPeerOrStorage(link)

                setPath(inputArgs, k, data)

        return inputArgs

    def _isStorage(self, value):
        return typeCheck.isString(value) and value.startswith('$$')

    def setAlgorithmStorage(self, jobId, input):
        storage = {}
        mappedInput = []
        for item in input:
            taskId = uid(8)
            (header, data) = self.encode(item)
            storageInfo = self.setData({'jobId': jobId, 'taskId': taskId, 'header': header, 'data': data})
            storage[taskId] = {'storageInfo': storageInfo}
            mappedInput.append('$${taskId}'.format(taskId=taskId))
        return (storage, mappedInput)

    @trace()
    def setData(self, options):
        jobId = options.get('jobId')
        taskId = options.get('taskId')
        header = options.get('header')
        data = options.get('data')
        result = self._storageManager.hkube.put(jobId, taskId, header=header, value=data)
        return result

    @timing
    def batchRequest(self, options, jobId):
        batchResponse = []
        for d in options:
            d.update({"jobId": jobId})

        with concurrent.futures.ThreadPoolExecutor(max_workers=self._maxWorkers) as executor:
            for out in executor.map(self._batchRequest, options):
                batchResponse += out

        return batchResponse

    def _batchRequest(self, options):
        batchResponse = []
        jobId = options.get('jobId')
        tasks = options.get('tasks')
        dataPath = options.get('path')
        storageInfo = options.get('storageInfo')
        if (storageInfo):
            storageResult = self._getFromCacheOrStorage(storageInfo, dataPath, storageInfo.get("path"))
            batchResponse.append(storageResult)
            return batchResponse
        tasksNotInCache, batchResponse = self._storageCache.getAll(tasks)
        if (tasksNotInCache):
            options['tasks'] = tasksNotInCache
            results = self._getFromPeer(options)
            for i, item in enumerate(results):
                size, content = item
                peerError = self._getPeerError(content)
                taskId = tasksNotInCache[i]
                if (peerError):
                    storageData = self._getDataForTask(jobId, taskId, dataPath)
                    batchResponse.append(storageData)
                else:
                    self._storageCache.update(taskId, content, size)
                    content = self._getPath(content, dataPath)
                    batchResponse.append(content)

        return batchResponse

    def _getDataForTask(self, jobId, taskId, dataPath):
        path = self._storageManager.hkube.createPath(jobId, taskId)
        return self._getFromCacheOrStorage({'path': path}, dataPath, taskId)

    def tryGetDataFromPeerOrStorage(self, options):
        dataPath = options.get('path')
        storageInfo = options.get('storageInfo')
        discovery = options.get('discovery')
        data = None
        hasResponse = False
        if (discovery):
            cacheId = options.get('taskId')
        else:
            cacheId = storageInfo.get('path')
        data = self._getFromCache(cacheId, dataPath)
        if not (data):
            if (discovery):
                size, data = self._getFromPeer(options)[0]
                peerError = self._getPeerError(data)
                hasResponse = not peerError
                data = None if peerError else data
                if (hasResponse):
                    self._setToCache(cacheId, data, size)
                    data = self._getPath(data, dataPath)
            if (not hasResponse and storageInfo):
                data = self._getFromCacheOrStorage(storageInfo, dataPath, cacheId)

        return data

    @trace(name='getFromPeer')
    @timing
    def _getFromPeer(self, options):
        taskId = options.get('taskId')
        tasks = [taskId] if taskId else options.get('tasks')
        discovery = options.get('discovery')
        port = discovery.get('port')
        host = discovery.get('host')

        if (self._dataServer and self._dataServer.isLocal(host, port)):
            dataList = self._dataServer.getDataByTaskId(tasks)
            responses = []
            for header, payload in dataList:
                responses.append((len(payload), self.decode(header=header, value=payload)))
        else:
            request = {
                'address': {
                    'port': port,
                    'host': host
                },
                'tasks': tasks,
                'encoding': self._requestEncoding,
                'timeout': self._requestTimeout,
                'networkTimeout': self._networkTimeout
            }
            dataRequest = DataRequest(request)
            responses = dataRequest.invoke()
        return responses

    def _getPeerError(self, options):
        error = None
        if (typeCheck.isDict(options)):
            error = options.get('hkube_error')

        return error

    def _getFromCacheOrStorage(self, options, dataPath, cacheID):
        data = self._getFromCache(cacheID, dataPath)
        if (data is None):
            size, data = self._getFromStorage(options)
            self._setToCache(cacheID, data, size)
            data = self._getPath(data, dataPath)

        return data

    @trace(name='getFromCache')
    @timing
    def _getFromCache(self, cacheId, dataPath):
        data = self._storageCache.get(cacheId)
        data = self._getPath(data, dataPath)
        return data

    def _setToCache(self, cacheId, data, size):
        self._storageCache.update(cacheId, data, size)

    @trace(name='getFromStorage')
    @timing
    def _getFromStorage(self, options):
        (header, payload) = self._storageManager.storage.get(options)
        decoded = self.decode(header=header, value=payload)
        size = len(payload)
        return (size, decoded)

    def createStorageInfo(self, options):
        jobId = options.get('jobId')
        taskId = options.get('taskId')
        encodedData = options.get('encodedData')

        path = self._storageManager.hkube.createPath(jobId, taskId)
        metadata = self.createMetadata(options)

        storageInfo = {
            'storageInfo': {
                'path': path,
                'size': len(encodedData) if encodedData else 0
            },
            'metadata': metadata
        }
        return storageInfo

    def createMetadata(self, options):
        nodeName = options.get('nodeName')
        data = options.get('data')
        savePaths = options.get('savePaths', [])

        metadata = dict()
        objData = dict()
        objData[nodeName] = data
        for path in savePaths:
            try:
                value = getPath(objData, path)
                if (value != 'DEFAULT'):
                    meta = self._getMetadata(value)
                    metadata[path] = meta
            except Exception:
                pass

        return metadata

    def _getMetadata(self, value):
        if (typeCheck.isDict(value)):
            meta = {'type': 'object'}
        elif (typeCheck.isList(value)):
            meta = {'type': 'array', 'size': len(value)}
        else:
            meta = {'type': str(type(value).__name__)}
        return meta

    def _getPath(self, data, dataPath):
        if (data and dataPath):
            newData = getPath(data, dataPath)
            if (newData == 'DEFAULT'):
                newData = None
        else:
            newData = data
        return newData