class TestSerializer(unittest.TestCase): ''' This test suite tests various serializer interface methods to ensure that they serialize data correctly and raise errors on unknown types. ''' def setUp(self): self.serializer = Serializer() def test_serialize_obj(self): ''' Tests that a normal Python object is serialized correctly. ''' obj = {'a set'} serialized = self.serializer.dump(obj, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, DEFAULT) self.assertEqual(self.serializer.load(serialized), obj) def test_serialize_numpy(self): ''' Tests that a numpy array is correctly serialized with PyArrow. ''' obj = np.random.randn(100, 100) serialized = self.serializer.dump(obj, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, NUMPY) deserialized = self.serializer.load(serialized) self.assertTrue(np.array_equal(deserialized, obj)) def test_serialize_to_bytes(self): ''' Tests that the serializer correctly converts to a serialized protobuf. ''' obj = {'a set'} val = Value() serialized = self.serializer.dump(obj, val, True) self.assertEqual(type(serialized), bytes) val.ParseFromString(serialized) self.assertEqual(val.type, DEFAULT) self.assertEqual(self.serializer.load(serialized), obj) def test_serialize_future(self): ''' Tests that the serializer correctly detects and converts a CloudburstFuture to a CloudburstReference. ''' kvs_client = MockAnnaClient() future = CloudburstFuture('id', kvs_client, self.serializer) serialized = self.serializer.dump(future, serialize=False) self.assertEqual(type(serialized), Value) self.assertEqual(serialized.type, DEFAULT) reference = self.serializer.load(serialized) self.assertEqual(type(reference), CloudburstReference) self.assertEqual(future.obj_id, reference.key)
def run(cloudburst: CloudburstConnection, num_requests: int, data_size: str, breakpoint: bool, do_optimize: bool): print('Creating data...') size = DATA_SIZES[data_size] for i in range(1, NUM_DATA_POINTS+1): arr = np.random.rand(size) cloudburst.put_object('data-' + str(i), arr) def stage1(self, row: Row) -> (int, str): idx = int(row['req_num'] / 10) + 1 key = 'data-%d' % (idx) return idx, key def stage2(self, row: Row) -> str: import numpy as np arr = row[row['key']] return float(np.sum(arr)) print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.') flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, names=['index', 'key']) \ .lookup('key', dynamic=True) \ .map(stage2, names=['sum']) optimize_rules['breakpoint'] = breakpoint if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('req_num', IntType)]) if breakpoint: print('Starting warmup...') for i in range(NUM_DATA_POINTS): inp = Table([('req_num', IntType)]) inp.insert([i * 10]) res = flow.run(inp).get() print('Pausing to let cache metadata propagate...') time.sleep(15) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') inp = Table([('req_num', IntType)]) inp.insert([i]) start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) with open('data.bts', 'wb') as f: from cloudburst.shared.serializer import Serializer ser = Serializer() bts = ser.dump(latencies) f.write(bts) print_latency_stats(latencies, 'E2E')