def run(flow, cloudburst, requests, local, sckt=None): schema = [('classify', StrType), ('translate', StrType)] french = [ 'Je m\'appelle Pierre.', 'Comment allez-vous aujourd\'hui?', 'La nuit est longue et froide, et je veux rentrer chez moi.', 'Tu es venue a minuit, mais je me suis déja couché.', 'On veut aller dehors mais il faut rester dedans.' ] german = [ 'Ich bin in Berliner.', 'Die katz ist saß auf dem Stuhl.', 'Sie schwimmt im Regen.', 'Ich gehe in den Supermarkt, aber mir ist kalt.', 'Ich habe nie gedacht, dass du Amerikanerin bist.' ] english = [ 'What is the weather like today?', 'Why does it rain so much in April?', 'I like running but my ankles hurt.', 'I should go home to eat dinner before it gets too late.', 'I would like to hang out with my friends, but I have to work.' ] inputs = [] for _ in range(20): table = Table(schema) if random.random() < 0.5: other = random.choice(french) else: other = random.choice(german) vals = [other, random.choice(english)] table.insert(vals) inputs.append(table) logging.info('Starting benchmark...') latencies = [] bench_start = time.time() for i in range(requests): if i % 100 == 0: logging.info(f'On request {i}...') inp = random.choice(inputs) start = time.time() result = flow.run(inp).get() end = time.time() latencies.append(end - start) bench_end = time.time() print_latency_stats(latencies, "E2E", not local, bench_end - bench_start) if sckt: bts = cp.dumps(latencies) sckt.send(bts)
def run(self, cloudburst, lookup_key, dynamic: bool, input_object, inp: Table): from flow.types.basic import get_type serialized = False if type(inp) == bytes: inp = deserialize(inp) serialized = True if cloudburst is None or dynamic: obj = input_object lookup_key = next(inp.get())[lookup_key] else: obj = cloudburst.get(lookup_key) schema = list(inp.schema) schema.append((lookup_key, get_type(type(obj)))) new_table = Table(schema) for row in inp.get(): vals = [row[key] for key, _ in inp.schema] vals.append(obj) new_table.insert(vals) if serialized: new_table = serialize(new_table) return new_table
def run(self, _, inp: GroupbyTable): result = Table(inp.schema) for group, gtable in inp.get(): for row in gtable.get(): result.insert(row) return result
def run(flow, cloudburst, requests, local, sckt=None): latencies = [] if not local: print = logging.info bench_start = time.time() for i in range(requests): if i % 100 == 0: logging.info(f'On request {i}...') inp = Table([('user', StrType), ('recent', NumpyType)]) uid = np.random.randint(NUM_USERS) recent = np.random.randint(0, NUM_PRODUCT_SETS, 5) inp.insert([str(uid), recent]) start = time.time() flow.run(inp).get() end = time.time() latencies.append(end - start) bench_end = time.time() print_latency_stats(latencies, "E2E", not local, bench_end - bench_start) if sckt: bts = cp.dumps(latencies) sckt.send(bts)
def run(self, cloudburst, aggregate, column, inp): serialized = False if type(inp) == bytes: serialized = True inp = deserialize(inp) if aggregate == 'count': aggfn = self.count if aggregate == 'min': aggfn = self.min if aggregate == 'max': aggfn = self.max if aggregate == 'sum': aggfn = self.sum if aggregate == 'average': aggfn = self.average if isinstance(inp, GroupbyTable): gb_col = inp.col val, _ = next(inp.get()) gb_typ = get_type(type(val)) result = Table([(gb_col, gb_typ), (aggregate, FloatType)]) for val, tbl in inp.get(): agg = aggfn(tbl, column) result.insert([val, float(agg)]) else: result = Table([(aggregate, FloatType)]) result.insert([float(aggnf(inp, column))]) if serialized: result = serialize(result) return result
def run(cloudburst: CloudburstConnection, num_requests: int, data_size: str, do_optimize: bool): def stage1(self, row: Row) -> bytes: import numpy as np return np.random.rand(row['size']) def stage2(self, row: Row) -> int: return 3 print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.') flow = Flow('colocate-benchmark', FlowType.PUSH, cloudburst) f1 = flow.map(stage1) p1 = f1.map(stage2, names=['val1']) p2 = f1.map(stage2, names=['val2']) p3 = f1.map(stage2, names=['val3']) p4 = f1.map(stage2, names=['val4']) p5 = f1.map(stage2, names=['val5']) # p6 = f1.map(stage2, names=['val6']) # p7 = f1.map(stage2, names=['val7']) # p8 = f1.map(stage2, names=['val8']) p1.join(p2).join(p3).join(p4).join(p5) # .join(p6).join(p7).join(p8) if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('size', IntType)]) inp.insert([DATA_SIZES[data_size]]) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) print_latency_stats(latencies, 'E2E')
def run(cloudburst: CloudburstConnection, num_requests: int, gamma: int, num_replicas: int): def stage1(self, val: int) -> int: return val + 1 def stage2(self, row: Row) -> float: import time from scipy.stats import gamma delay = gamma.rvs(3.0, scale=row['scale']) * 10 / 1000 # convert to ms time.sleep(delay) return delay def stage3(self, row: Row) -> float: return row['val'] print(f'Creating flow with {num_replicas} replicas and' + f' gamma={GAMMA_VALS[gamma]}') flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, col='val') \ .map(stage2, names=['val'], high_variance=True) \ .map(stage3, names=['val']) optimize_rules['compete_replicas'] = num_replicas flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('val', IntType), ('scale', FloatType)]) inp.insert([1, GAMMA_VALS[gamma]]) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') time.sleep(.300) # Sleep to let the queue drain. start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) print_latency_stats(latencies, 'E2E')
def run(flow, cloudburst, requests, local, sckt=None): if not local: if not os.path.exists('imagenet_sample.zip'): raise RuntimeError( 'Expect to have the imagenet_sample directory locally.') os.system('unzip imagenet_sample.zip') else: if not os.path.exists('imagenet_sample/imagenet'): raise RuntimeError( 'Expect to have the imagenet_sample directory locally.') prefix = 'imagenet_sample/imagenet' files = os.listdir(prefix) files = [os.path.join(prefix, fname) for fname in files] inputs = [] logging.info('Loading input images...') for fname in files: table = Table([('img', NumpyType)]) img = np.array(Image.open(fname).convert('RGB').resize((224, 224))) table.insert([img]) inputs.append(table) logging.info('Starting benchmark...') latencies = [] bench_start = time.time() for i in range(requests): if i % 100 == 0: logging.info(f'On request {i}...') inp = random.choice(inputs) start = time.time() result = flow.run(inp).get() end = time.time() latencies.append(end - start) bench_end = time.time() print_latency_stats(latencies, "E2E", not local, bench_end - bench_start) if sckt: bts = cp.dumps(latencies) sckt.send(bts)
def run(cloudburst: CloudburstConnection, num_requests: int, num_fns: int, data_size: str, do_optimize: bool): def fusion_op(self, row: Row) -> bytes: return row['data'] print(f'Creating flow with {num_fns} operators and {data_size}' + f' ({DATA_SIZES[data_size]}) inputs.') flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst) marker = flow for _ in range(num_fns): marker = marker.map(fusion_op, names=['data']) if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('data', BtsType)]) inp.insert([os.urandom(DATA_SIZES[data_size])]) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) print_latency_stats(latencies, 'E2E')
flow = optimize(flow, rules=optimize_rules) print('Deploying flow...') flow.deploy() local = args.local[0].lower() == 'true' if local: run(flow, cloudburst, args.requests[0], local) else: flow.cloudburst = None # Hack to serialize and send flow. queue = [flow] while len(queue) > 0: op = queue.pop(0) op.cb_fn = None queue.extend(op.downstreams) sockets = [] benchmark_ips = [] with open('benchmarks.txt', 'r') as f: benchmark_ips = [line.strip() for line in f.readlines()] sample_input = Table([('img', NumpyType)]) img = np.array( Image.open('panda.jpg').convert('RGB').resize((224, 224))) sample_input.insert([img]) run_distributed_benchmark(flow, args.requests[0], 'cascade', args.threads[0], benchmark_ips, sample_input)
r_index = predict_row['resnet_index'] r_perc = predict_row['resnet_perc'] all_percentages = (a_perc + r_perc) / 2 indices = np.argsort(all_percentages)[::-1] return classes[indices[0]] import base64 import sys from cloudburst.client.client import CloudburstConnection table = Table([('img', StrType)]) img = base64.b64encode(open('panda.jpg', "rb").read()).decode('ascii') table.insert([img]) cloudburst = CloudburstConnection(sys.argv[1], '3.226.122.35') flow = Flow('ensemble-flow', FlowType.PUSH, cloudburst) img = flow.map(transform, init=transform_init, names=['img']) anet = img.map(alexnet_model, init=alexnet_init, names=['alexnet_index', 'alexnet_perc']) rnet = img.map(resnet_model, init=resnet_init, names=['resnet_index', 'resnet_perc']) anet.join(rnet).map(ensemble_predict, names=['class']) flow.deploy() from cloudburst.server.benchmarks.utils import print_latency_stats import time print('Starting benchmark...')
cloudburst.list() import random import string salt = "".join(random.choices(string.ascii_letters, k=6)) print("Running sanity check") cloud_sq = cloudburst.register(lambda _, x: x * x, "square-2"+salt) print(cloud_sq(2).get()) cloudburst.delete_dag("dag") cloudburst.register_dag("dag", ["square-2"+salt], []) print(cloudburst.call_dag("dag", {"square-2"+salt: [2]}).get()) # 1 / 0 print("Running example flow") dataflow = Flow("example-flow"+salt, FlowType.PUSH, cloudburst) dataflow.map(map_fn, names=["sum"]).filter(filter_fn) table = Table([("a", IntType), ("b", IntType)]) table.insert([1, 2]) table.insert([1, 3]) table.insert([1, 4]) dataflow.register() dataflow.deploy() print(dataflow) print("deployed") print(dataflow.run(table).get())
def run(cloudburst: CloudburstConnection, num_requests: int, batch_size: int, gpu: bool): with open('imagenet_classes.txt', 'r') as f: classes = [line.strip() for line in f.readlines()] cloudburst.put_object('imagenet-classes', classes) def resnet_init_gpu(self, cloudburst): import os import torch import torchvision from torchvision import transforms tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints') self.resnet = torch.load(os.path.join(tpath, 'resnet101.model')).cuda() self.resnet.eval() self.transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.classes = cloudburst.get('imagenet-classes') def resnet_model_gpu(self, table: Table) -> str: """ AlexNet for image classification on ImageNet """ import torch inputs = [] for row in table.get(): img = self.transforms(row['img']) inputs.append(img) inputs = torch.stack(inputs, dim=0).cuda() output = self.resnet(inputs) _, indices = torch.sort(output, descending=True) indices = indices.cpu().detach().numpy() result = [] for idx_set in indices: index = idx_set[0] result.append(self.classes[index]) return result def resnet_init_cpu(self, cloudburst): import os import torch import torchvision from torchvision import transforms tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints') self.resnet = torch.load(os.path.join(tpath, 'resnet101.model')) self.resnet.eval() self.transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.classes = cloudburst.get('imagenet-classes') def resnet_model_cpu(self, table: Table) -> str: """ AlexNet for image classification on ImageNet """ import torch inputs = [] for row in table.get(): img = self.transforms(row['img']) inputs.append(img) inputs = torch.stack(inputs, dim=0) output = self.resnet(inputs) _, indices = torch.sort(output, descending=True) indices = indices.detach().numpy() result = [] for idx_set in indices: index = idx_set[0] result.append(self.classes[index]) return result print(f'Creating flow with size {batch_size} batches.') flow = Flow('batching-benchmark', FlowType.PUSH, cloudburst) if gpu: flow.map(resnet_model_gpu, init=resnet_init_gpu, names=['class'], gpu=True, batching=True) else: flow.map(resnet_model_cpu, init=resnet_init_cpu, names=['class'], batching=True) flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('img', NumpyType)]) img = np.array(Image.open('panda.jpg').convert('RGB').resize((224, 224))) inp.insert([img]) kvs = cloudburst.kvs_client if gpu: print('Starting GPU warmup...') for _ in range(50): flow.run(inp).get() print('Finished warmup...') print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') futs = [] for _ in range(batch_size): futs.append(flow.run(inp)) pending = set([fut.obj_id for fut in futs]) # Break these apart to batch the KVS get requests. start = time.time() while len(pending) > 0: get_start = time.time() response = kvs.get(list(pending)) for key in response: if response[key] is not None: pending.discard(key) end = time.time() latencies.append(end - start) compute_time = np.mean(latencies) * num_requests tput = (batch_size * num_requests) / (compute_time) print('THROUGHPUT: %.2f' % (tput)) print_latency_stats(latencies, 'E2E')
def run(self, _, on, how, left, right): serialized = False if type(left) == bytes: left = deserialize(left) right = deserialize(right) serialized = True # Note: We currently don't support batching with custom # seriralization for joins. Shouldn't be hard to implement but # skipping it for expediency. batching = False if type(left) == list: batching = True _, left = merge_tables(left) mappings, right = merge_tables(right) new_schema = merge_schema(left.schema, right.schema) result = Table(new_schema) ljoin = (how == 'left') ojoin = (how == 'outer') # Track whether each right row has been inserted for outer # joins. rindex_map = {} for lrow in left.get(): lrow_inserted = False idx = 0 for rrow in right.get(): if lrow[on] == rrow[on]: new_row = merge_row(lrow, rrow, new_schema) result.insert(new_row) lrow_inserted = True rindex_map[idx] = True idx += 1 if not lrow_inserted and (ljoin or ojoin): rvals = [None] * len(right.schema) rrow = Row(right.schema, rvals, lrow[Row.qid_key]) new_row = merge_row(lrow, rrow, new_schema) result.insert(new_row) if ojoin: idx = 0 for row in right.get(): if idx not in rindex_map: lvals = [None] * len(left.schema) lrow = Row(left.schema, lvals, row[Row.qid_key]) new_row = merge_row(lrow, row, new_schema) result.insert(new_row) idx += 1 if serialized: result = serialize(result) if batching: result = demux_tables(result, mappings) return result
# product_set = np.random.randn(2500, 512) # key = 'category-' + str(i) # cloudburst.put_object(key, product_set) print('Deploying flow...') flow.deploy() print('Starting warmup phase...') for i in range(NUM_PRODUCT_SETS): if i % 100 == 0: print(f'On warmup {i}...') uid = np.random.randint(NUM_USERS) recent = np.array([i, 0, 0, 0, 0]) inp = Table([('user', StrType), ('recent', NumpyType)]) inp.insert([str(uid), recent]) flow.run(inp).get() print('Starting benchmark...') local = args.local[0].lower() == 'true' if local: run(flow, cloudburst, args.requests[0], local) else: flow.cloudburst = None # Hack to serialize and send flow. queue = [flow] while len(queue) > 0: op = queue.pop(0) op.cb_fn = None
def run(cloudburst: CloudburstConnection, num_requests: int, data_size: str, breakpoint: bool, do_optimize: bool): print('Creating data...') size = DATA_SIZES[data_size] for i in range(1, NUM_DATA_POINTS+1): arr = np.random.rand(size) cloudburst.put_object('data-' + str(i), arr) def stage1(self, row: Row) -> (int, str): idx = int(row['req_num'] / 10) + 1 key = 'data-%d' % (idx) return idx, key def stage2(self, row: Row) -> str: import numpy as np arr = row[row['key']] return float(np.sum(arr)) print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.') flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, names=['index', 'key']) \ .lookup('key', dynamic=True) \ .map(stage2, names=['sum']) optimize_rules['breakpoint'] = breakpoint if do_optimize: flow = optimize(flow, rules=optimize_rules) print('Flow has been optimized...') flow.deploy() print('Flow successfully deployed!') latencies = [] inp = Table([('req_num', IntType)]) if breakpoint: print('Starting warmup...') for i in range(NUM_DATA_POINTS): inp = Table([('req_num', IntType)]) inp.insert([i * 10]) res = flow.run(inp).get() print('Pausing to let cache metadata propagate...') time.sleep(15) print('Starting benchmark...') for i in range(num_requests): if i % 100 == 0 and i > 0: print(f'On request {i}...') inp = Table([('req_num', IntType)]) inp.insert([i]) start = time.time() res = flow.run(inp).get() end = time.time() latencies.append(end - start) with open('data.bts', 'wb') as f: from cloudburst.shared.serializer import Serializer ser = Serializer() bts = ser.dump(latencies) f.write(bts) print_latency_stats(latencies, 'E2E')
args = parser.parse_args() benchmark_ips = [] with open(args.benchmarks[0], 'r') as f: benchmark_ips = f.readlines() cloudburst = CloudburstConnection(args.cloudburst[0], args.ip[0]) print('Successfully connected to Cloudburst') flow = Flow('scaling-benchmark', FlowType.PUSH, cloudburst) flow.map(stage1, names=['val']).map(stage2, names=['val']) table = Table([('val', IntType)]) table.insert([1]) num_bench = len(benchmark_ips) num_start = int(start_percent * num_bench) flow.cloudburst = None # Hack to serialize and send flow. queue = [flow] while len(queue) > 0: op = queue.pop(0) op.cb_fn = None queue.extend(op.downstreams) flow = cp.dumps(flow) sockets = []
flow = optimize(flow, rules=optimize_rules) print('Deploying flow...') flow.deploy() print('Starting benchmark...') local = args.local[0].lower() == 'true' if local: run(flow, cloudburst, args.requests[0], local) else: flow.cloudburst = None # Hack to serialize and send flow. queue = [flow] while len(queue) > 0: op = queue.pop(0) op.cb_fn = None queue.extend(op.downstreams) sockets = [] benchmark_ips = [] with open('benchmarks.txt', 'r') as f: benchmark_ips = [line.strip() for line in f.readlines()] sample_input = Table([('classify', StrType), ('translate', StrType)]) sample_input.insert(['Je m\'appelle Pierre.', 'How are you?']) run_distributed_benchmark(flow, args.requests[0], 'nmt', args.threads[0], benchmark_ips, sample_input)