from cloudburst.server.executor import utils import cloudburst.server.utils as sutils from cloudburst.shared.proto.cloudburst_pb2 import ( Continuation, DagTrigger, FunctionCall, NORMAL, MULTI, # Cloudburst's consistency modes, EXECUTION_ERROR, FUNC_NOT_FOUND, # Cloudburst's error types MULTIEXEC # Cloudburst's execution types ) from cloudburst.shared.reference import CloudburstReference from cloudburst.shared.serializer import Serializer serializer = Serializer() def exec_function(exec_socket, kvs, user_library, cache, function_cache): call = FunctionCall() call.ParseFromString(exec_socket.recv()) fargs = [serializer.load(arg) for arg in call.arguments.values] if call.name in function_cache: f = function_cache[call.name] else: f = utils.retrieve_function(call.name, kvs, user_library, call.consistency) if not f:
class TestSerializer(unittest.TestCase): ''' This test suite tests various serializer interface methods to ensure that they serialize data correctly and raise errors on unknown types. ''' def setUp(self): self.serializer = Serializer() def test_serialize_obj(self): ''' Tests that a normal Python object is serialized correctly. ''' obj = {'a set'} serialized = self.serializer.dump(obj, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, DEFAULT) self.assertEqual(self.serializer.load(serialized), obj) def test_serialize_numpy(self): ''' Tests that a numpy array is correctly serialized with PyArrow. ''' obj = np.random.randn(100, 100) serialized = self.serializer.dump(obj, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, NUMPY) deserialized = self.serializer.load(serialized) self.assertTrue(np.array_equal(deserialized, obj)) def test_serialize_to_bytes(self): ''' Tests that the serializer correctly converts to a serialized protobuf. ''' obj = {'a set'} val = Value() serialized = self.serializer.dump(obj, val, True) self.assertEqual(type(serialized), bytes) val.ParseFromString(serialized) self.assertEqual(val.type, DEFAULT) self.assertEqual(self.serializer.load(serialized), obj) def test_serialize_future(self): ''' Tests that the serializer correctly detects and converts a CloudburstFuture to a CloudburstReference. ''' kvs_client = MockAnnaClient() future = CloudburstFuture('id', kvs_client, self.serializer) serialized = self.serializer.dump(future, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, DEFAULT) reference = self.serializer.load(serialized) self.assertEqual(type(reference), CloudburstReference) self.assertEqual(future.obj_id, reference.key)
def setUp(self): self.serializer = Serializer()
def run(cloudburst: CloudburstConnection, num_requests: int, data_size: str, breakpoint: bool, do_optimize: bool): print('Creating data...') size = DATA_SIZES[data_size] for i in range(1, NUM_DATA_POINTS+1): arr = np.random.rand(size) cloudburst.put_object('data-' + str(i), arr) def stage1(self, row: Row) -> (int, str): idx = int(row['req_num'] / 10) + 1 key = 'data-%d' % (idx) return idx, key def stage2(self, row: Row) -> str: import numpy as np arr = row[row['key']] return float(np.sum(arr)) print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.') flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, names=['index', 'key']) \ .lookup('key', dynamic=True) \ .map(stage2, names=['sum']) optimize_rules['breakpoint'] = breakpoint if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('req_num', IntType)]) if breakpoint: print('Starting warmup...') for i in range(NUM_DATA_POINTS): inp = Table([('req_num', IntType)]) inp.insert([i * 10]) res = flow.run(inp).get() print('Pausing to let cache metadata propagate...') time.sleep(15) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') inp = Table([('req_num', IntType)]) inp.insert([i]) start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) with open('data.bts', 'wb') as f: from cloudburst.shared.serializer import Serializer ser = Serializer() bts = ser.dump(latencies) f.write(bts) print_latency_stats(latencies, 'E2E')