예제 #1
0
from cloudburst.server.executor import utils
import cloudburst.server.utils as sutils
from cloudburst.shared.proto.cloudburst_pb2 import (
    Continuation,
    DagTrigger,
    FunctionCall,
    NORMAL,
    MULTI,  # Cloudburst's consistency modes,
    EXECUTION_ERROR,
    FUNC_NOT_FOUND,  # Cloudburst's error types
    MULTIEXEC  # Cloudburst's execution types
)
from cloudburst.shared.reference import CloudburstReference
from cloudburst.shared.serializer import Serializer

serializer = Serializer()


def exec_function(exec_socket, kvs, user_library, cache, function_cache):
    call = FunctionCall()
    call.ParseFromString(exec_socket.recv())

    fargs = [serializer.load(arg) for arg in call.arguments.values]

    if call.name in function_cache:
        f = function_cache[call.name]
    else:
        f = utils.retrieve_function(call.name, kvs, user_library,
                                    call.consistency)

    if not f:
예제 #2
0
class TestSerializer(unittest.TestCase):
    '''
    This test suite tests various serializer interface methods to ensure that
    they serialize data correctly and raise errors on unknown types.
    '''
    def setUp(self):
        self.serializer = Serializer()

    def test_serialize_obj(self):
        '''
        Tests that a normal Python object is serialized correctly.
        '''
        obj = {'a set'}

        serialized = self.serializer.dump(obj, serialize=False)

        self.assertEqual(type(serialized), Value)
        self.assertEqual(serialized.type, DEFAULT)

        self.assertEqual(self.serializer.load(serialized), obj)

    def test_serialize_numpy(self):
        '''
        Tests that a numpy array is correctly serialized with PyArrow.
        '''
        obj = np.random.randn(100, 100)

        serialized = self.serializer.dump(obj, serialize=False)

        self.assertEqual(type(serialized), Value)
        self.assertEqual(serialized.type, NUMPY)

        deserialized = self.serializer.load(serialized)
        self.assertTrue(np.array_equal(deserialized, obj))

    def test_serialize_to_bytes(self):
        '''
        Tests that the serializer correctly converts to a serialized protobuf.
        '''
        obj = {'a set'}

        val = Value()
        serialized = self.serializer.dump(obj, val, True)

        self.assertEqual(type(serialized), bytes)
        val.ParseFromString(serialized)
        self.assertEqual(val.type, DEFAULT)

        self.assertEqual(self.serializer.load(serialized), obj)

    def test_serialize_future(self):
        '''
        Tests that the serializer correctly detects and converts a
        CloudburstFuture to a CloudburstReference.
        '''

        kvs_client = MockAnnaClient()
        future = CloudburstFuture('id', kvs_client, self.serializer)

        serialized = self.serializer.dump(future, serialize=False)

        self.assertEqual(type(serialized), Value)
        self.assertEqual(serialized.type, DEFAULT)

        reference = self.serializer.load(serialized)
        self.assertEqual(type(reference), CloudburstReference)
        self.assertEqual(future.obj_id, reference.key)
예제 #3
0
 def setUp(self):
     self.serializer = Serializer()
예제 #4
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        breakpoint: bool,
        do_optimize: bool):

    print('Creating data...')
    size = DATA_SIZES[data_size]
    for i in range(1, NUM_DATA_POINTS+1):
        arr = np.random.rand(size)
        cloudburst.put_object('data-' + str(i), arr)

    def stage1(self, row: Row) -> (int, str):
        idx = int(row['req_num'] / 10) + 1
        key = 'data-%d' % (idx)

        return idx, key

    def stage2(self, row: Row) -> str:
        import numpy as np
        arr = row[row['key']]

        return float(np.sum(arr))

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, names=['index', 'key']) \
        .lookup('key', dynamic=True) \
        .map(stage2, names=['sum'])

    optimize_rules['breakpoint'] = breakpoint
    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('req_num', IntType)])

    if breakpoint:
        print('Starting warmup...')
        for i in range(NUM_DATA_POINTS):
            inp = Table([('req_num', IntType)])
            inp.insert([i * 10])

            res = flow.run(inp).get()

        print('Pausing to let cache metadata propagate...')
        time.sleep(15)

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        inp = Table([('req_num', IntType)])
        inp.insert([i])

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    with open('data.bts', 'wb') as f:
        from cloudburst.shared.serializer import Serializer
        ser = Serializer()
        bts = ser.dump(latencies)
        f.write(bts)

    print_latency_stats(latencies, 'E2E')