def run(cloudburst: CloudburstConnection, num_requests: int, batch_size: int, gpu: bool): with open('imagenet_classes.txt', 'r') as f: classes = [line.strip() for line in f.readlines()] cloudburst.put_object('imagenet-classes', classes) def resnet_init_gpu(self, cloudburst): import os import torch import torchvision from torchvision import transforms tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints') self.resnet = torch.load(os.path.join(tpath, 'resnet101.model')).cuda() self.resnet.eval() self.transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.classes = cloudburst.get('imagenet-classes') def resnet_model_gpu(self, table: Table) -> str: """ AlexNet for image classification on ImageNet """ import torch inputs = [] for row in table.get(): img = self.transforms(row['img']) inputs.append(img) inputs = torch.stack(inputs, dim=0).cuda() output = self.resnet(inputs) _, indices = torch.sort(output, descending=True) indices = indices.cpu().detach().numpy() result = [] for idx_set in indices: index = idx_set[0] result.append(self.classes[index]) return result def resnet_init_cpu(self, cloudburst): import os import torch import torchvision from torchvision import transforms tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints') self.resnet = torch.load(os.path.join(tpath, 'resnet101.model')) self.resnet.eval() self.transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.classes = cloudburst.get('imagenet-classes') def resnet_model_cpu(self, table: Table) -> str: """ AlexNet for image classification on ImageNet """ import torch inputs = [] for row in table.get(): img = self.transforms(row['img']) inputs.append(img) inputs = torch.stack(inputs, dim=0) output = self.resnet(inputs) _, indices = torch.sort(output, descending=True) indices = indices.detach().numpy() result = [] for idx_set in indices: index = idx_set[0] result.append(self.classes[index]) return result print(f'Creating flow with size {batch_size} batches.') flow = Flow('batching-benchmark', FlowType.PUSH, cloudburst) if gpu: flow.map(resnet_model_gpu, init=resnet_init_gpu, names=['class'], gpu=True, batching=True) else: flow.map(resnet_model_cpu, init=resnet_init_cpu, names=['class'], batching=True) flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('img', NumpyType)]) img = np.array(Image.open('panda.jpg').convert('RGB').resize((224, 224))) inp.insert([img]) kvs = cloudburst.kvs_client if gpu: print('Starting GPU warmup...') for _ in range(50): flow.run(inp).get() print('Finished warmup...') print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') futs = [] for _ in range(batch_size): futs.append(flow.run(inp)) pending = set([fut.obj_id for fut in futs]) # Break these apart to batch the KVS get requests. start = time.time() while len(pending) > 0: get_start = time.time() response = kvs.get(list(pending)) for key in response: if response[key] is not None: pending.discard(key) end = time.time() latencies.append(end - start) compute_time = np.mean(latencies) * num_requests tput = (batch_size * num_requests) / (compute_time) print('THROUGHPUT: %.2f' % (tput)) print_latency_stats(latencies, 'E2E')
resnet = resnet_model_gpu resnet_cons = resnet_init_gpu incept = inceptionv3_model_gpu incept_cons = inceptionv3_init_gpu trans = transform_batch else: resnet = resnet_model resnet_cons = resnet_init incept = inceptionv3_model incept_cons = inceptionv3_init trans = transform with open('imagenet_classes.txt', 'r') as f: classes = [line.strip() for line in f.readlines()] cloudburst.put_object('imagenet-classes', classes) flow = Flow('cascade-flow', FlowType.PUSH, cloudburst) rnet = flow.map(trans, init=transform_init, names=['img'], batching=gpu) \ .map(resnet, init=resnet_cons, names=['img', 'resnet_index', 'resnet_max_prob'], gpu=gpu, batching=gpu) incept = rnet.filter(low_prob) \ .map(incept, init=incept_cons,
def run(cloudburst: CloudburstConnection, num_requests: int, data_size: str, breakpoint: bool, do_optimize: bool): print('Creating data...') size = DATA_SIZES[data_size] for i in range(1, NUM_DATA_POINTS+1): arr = np.random.rand(size) cloudburst.put_object('data-' + str(i), arr) def stage1(self, row: Row) -> (int, str): idx = int(row['req_num'] / 10) + 1 key = 'data-%d' % (idx) return idx, key def stage2(self, row: Row) -> str: import numpy as np arr = row[row['key']] return float(np.sum(arr)) print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.') flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, names=['index', 'key']) \ .lookup('key', dynamic=True) \ .map(stage2, names=['sum']) optimize_rules['breakpoint'] = breakpoint if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('req_num', IntType)]) if breakpoint: print('Starting warmup...') for i in range(NUM_DATA_POINTS): inp = Table([('req_num', IntType)]) inp.insert([i * 10]) res = flow.run(inp).get() print('Pausing to let cache metadata propagate...') time.sleep(15) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') inp = Table([('req_num', IntType)]) inp.insert([i]) start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) with open('data.bts', 'wb') as f: from cloudburst.shared.serializer import Serializer ser = Serializer() bts = ser.dump(latencies) f.write(bts) print_latency_stats(latencies, 'E2E')