def test_exec_with_ordered_set(self): ''' Tests a single function execution with an ordered set input as an argument to validate that ordered sets are correctly handled. ''' def func(_, x): return len(x) >= 2 and x[0] < x[1] fname = 'set_order' arg_value = [2, 3] arg_name = 'set' self.kvs_client.put(arg_name, serializer.dump_lattice(arg_value)) # Put the function into the KVS and create a function call. create_function(func, self.kvs_client, fname) call = self._create_function_call(fname, [DropletReference(arg_name, True)], NORMAL) self.socket.inbox.append(call.SerializeToString()) # Execute the function call. exec_function(self.socket, self.kvs_client, self.user_library, {}) # Assert that there have been 0 messages sent. self.assertEqual(len(self.socket.outbox), 0) # Retrieve the result, ensure it is a LWWPairLattice, then deserialize # it. result = self.kvs_client.get(self.response_key)[self.response_key] self.assertEqual(type(result), LWWPairLattice) result = serializer.load_lattice(result) # Check that the output is equal to a local function execution. self.assertEqual(result, func('', arg_value))
def test_call_function_with_refs(self): ''' Creates a scenario where the policy should deterministically pick the same executor to run a request on: There is one reference, and it's cached only on the node we create in this test. ''' # Add a new executor for which we will construct cached references. ip_address = '192.168.0.1' new_key = (ip_address, 2) self.policy.unpinned_executors.add(new_key) # Create a new reference and add its metadata. ref_name = 'reference' self.policy.key_locations[ref_name] = [ip_address] # Create a function call that asks for this reference. call = FunctionCall() call.name = 'function' call.request_id = 12 val = call.arguments.values.add() serializer.dump(DropletReference(ref_name, True), val) self.socket.inbox.append(call.SerializeToString(0)) # Execute the scheduling policy. call_function(self.socket, self.pusher_cache, self.policy) # Check that the correct number of messages were sent. self.assertEqual(len(self.socket.outbox), 1) self.assertEqual(len(self.pusher_cache.socket.outbox), 1) # Extract and deserialize the messages. response = GenericResponse() forwarded = FunctionCall() response.ParseFromString(self.socket.outbox[0]) forwarded.ParseFromString(self.pusher_cache.socket.outbox[0]) self.assertTrue(response.success) self.assertEqual(response.response_id, forwarded.response_key) self.assertEqual(forwarded.name, call.name) self.assertEqual(forwarded.request_id, call.request_id) # Makes sure that the correct executor was chosen. self.assertEqual(len(self.pusher_cache.addresses), 1) self.assertEqual(self.pusher_cache.addresses[0], utils.get_exec_address(*new_key))
def test_exec_func_with_causal_ref(self): ''' Tests a function execution where the argument is a reference to the KVS in causal mode. Ensures that the result has the correct causal dependencies and metadata. ''' # Create the function and serialize it into a lattice. def func(_, x): return x * x fname = 'square' create_function(func, self.kvs_client, fname, SingleKeyCausalLattice) # Put an argument value into the KVS. arg_value = 2 arg_name = 'key' self.kvs_client.put( arg_name, serializer.dump_lattice(arg_value, MultiKeyCausalLattice)) # Create and serialize the function call. call = self._create_function_call(fname, [DropletReference(arg_name, True)], MULTI) self.socket.inbox.append(call.SerializeToString()) # Execute the function call. exec_function(self.socket, self.kvs_client, self.user_library, {}) # Assert that there have been 0 messages sent. self.assertEqual(len(self.socket.outbox), 0) # Retrieve the result, ensure it is a MultiKeyCausalLattice, then # deserialize it. result = self.kvs_client.get(self.response_key)[self.response_key] self.assertEqual(type(result), MultiKeyCausalLattice) self.assertEqual(result.vector_clock, DEFAULT_VC) self.assertEqual(len(result.dependencies.reveal()), 1) self.assertTrue(arg_name in result.dependencies.reveal()) self.assertEqual(result.dependencies.reveal()[arg_name], DEFAULT_VC) result = serializer.load_lattice(result)[0] # Check that the output is equal to a local function execution. self.assertEqual(result, func('', arg_value))
def dump(self, data, valobj=None, serialize=True): if not valobj: valobj = Value() # If we are attempting to pass a future into another function, we # simply turn it into a reference because the runtime knows how to # automatically resolve it. if isinstance(data, future.DropletFuture): valobj.body = self._dump_default( DropletReference(data.obj_id, True)) valobj.type = DEFAULT elif isinstance(data, np.ndarray): valobj.body = self._dump_numpy(data) valobj.type = NUMPY else: valobj.body = self._dump_default(data) valobj.type = DEFAULT if not serialize: return valobj return valobj.SerializeToString()
def test_exec_func_with_ref(self): ''' Tests a function execution where the argument is a reference to the KVS in normal mode. ''' # Create the function and serialize it into a lattice. def func(_, x): return x * x fname = 'square' create_function(func, self.kvs_client, fname) # Put an argument value into the KVS. arg_value = 2 arg_name = 'key' self.kvs_client.put(arg_name, serializer.dump_lattice(arg_value)) # Create and serialize the function call. call = self._create_function_call(fname, [DropletReference(arg_name, True)], NORMAL) self.socket.inbox.append(call.SerializeToString()) # Execute the function call. exec_function(self.socket, self.kvs_client, self.user_library, {}) # Assert that there have been 0 messages sent. self.assertEqual(len(self.socket.outbox), 0) # Retrieve the result, ensure it is a LWWPairLattice, then deserialize # it. result = self.kvs_client.get(self.response_key)[self.response_key] self.assertEqual(type(result), LWWPairLattice) result = serializer.load_lattice(result) # Check that the output is equal to a local function execution. self.assertEqual(result, func('', arg_value))
def run(droplet_client, num_requests, sckt): ''' UPLOAD THE MODEL OBJECT ''' model_key = 'mobilenet-model' label_key = 'mobilenet-label-map' with open('model/label_map.json', 'rb') as f: bts = f.read() lattice = LWWPairLattice(0, bts) droplet_client.kvs_client.put(label_key, lattice) with open('model/mobilenet_v2_1.4_224_frozen.pb', 'rb') as f: bts = f.read() lattice = LWWPairLattice(0, bts) droplet_client.kvs_client.put(model_key, lattice) ''' DEFINE AND REGISTER FUNCTIONS ''' def preprocess(droplet, inp): from skimage import filters return filters.gaussian(inp).reshape(1, 224, 224, 3) class Mobilenet: def __init__(self, droplet, model_key, label_map_key): import tensorflow as tf import json tf.enable_eager_execution() self.model = droplet.get(model_key, deserialize=False) self.label_map = json.loads( droplet.get(label_map_key, deserialize=False)) self.gd = tf.GraphDef.FromString(self.model) self.inp, self.predictions = tf.import_graph_def( self.gd, return_elements=[ 'input:0', 'MobilenetV2/Predictions/Reshape_1:0' ]) def run(self, droplet, img): # load libs import tensorflow as tf from PIL import Image from io import BytesIO import base64 import numpy as np import json tf.enable_eager_execution() # load image and model # img = np.array(Image.open(BytesIO(base64.b64decode(img))).resize((224, 224))).astype(np.float) / 128 - 1 with tf.Session(graph=self.inp.graph): x = self.predictions.eval(feed_dict={self.inp: img}) return x def average(droplet, inp): import numpy as np inp = [ inp, ] return np.mean(inp, axis=0) cloud_prep = droplet_client.register(preprocess, 'preprocess') cloud_mnet = droplet_client.register((Mobilenet, (model_key, label_key)), 'mnet') cloud_average = droplet_client.register(average, 'average') if cloud_prep and cloud_mnet and cloud_average: print('Successfully registered preprocess, mnet, and average ' + 'functions.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' arr = np.random.randn(1, 224, 224, 3) prep_test = cloud_prep(arr).get() if type(prep_test) != np.ndarray: print('Unexpected result from preprocess(arr): %s' % (str(prep_test))) sys.exit(1) mnet_test = cloud_mnet(prep_test).get() if type(mnet_test) != np.ndarray: print('Unexpected result from mobilenet(arr): %s' % (str(mnet_test))) sys.exit(1) average_test = cloud_average(mnet_test).get() if type(average_test) != np.ndarray: print('Unexpected result from average(arr): %s' % (str(average_test))) sys.exit(1) print('Successfully tested functions!') ''' CREATE DAG ''' dag_name = 'mnet' functions = ['preprocess', 'mnet', 'average'] connections = [('preprocess', 'mnet'), ('mnet', 'average')] success, error = droplet_client.register_dag(dag_name, functions, connections) if not success: print('Failed to register DAG: %s' % (str(error))) sys.exit(1) ''' RUN DAG ''' total_time = [] # Create all the input data oids = [] for _ in range(num_requests): arr = np.random.randn(1, 224, 224, 3) oid = str(uuid.uuid4()) oids.append(oid) droplet_client.put_object(oid, arr) for i in range(num_requests): oid = oids[i] arg_map = {'preprocess': [DropletReference(oid, True)]} start = time.time() droplet_client.call_dag(dag_name, arg_map, True) end = time.time() total_time += [end - start] if sckt: sckt.send(cp.dumps(total_time)) return total_time, [], [], 0
def run(droplet_client, num_requests, sckt): ''' DEFINE AND REGISTER FUNCTIONS ''' def summa(droplet, uid, lblock, rblock, rid, cid, numrows, numcols): import cloudpickle as cp bsize = lblock.shape[0] ssize = 100 res = np.zeros((bsize, bsize)) myid = droplet.getid() key = '%s: (%d, %d)' % (uid, rid, cid) droplet.put(key, myid) proc_locs = {} keyset = [] idset = {} for i in range(numrows): if i == rid: continue key = '%s: (%d, %d)' % (uid, i, cid) keyset.append(key) idset[key] = (i, cid) for j in range(numcols): if j == cid: continue key = '%s: (%d, %d)' % (uid, rid, j) keyset.append(key) idset[key] = (rid, j) locs = droplet.get(keyset) while None in locs.values(): locs = droplet.get(keyset) for key in locs: loc = idset[key] proc_locs[loc] = locs[key] for c in range(numcols): if c == cid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(rid, c)] send_id = ('l', k + (bsize * cid)) msg = cp.dumps( (send_id, lblock[:, (k * ssize):((k + 1) * ssize)])) droplet.send(dest, msg) for r in range(numrows): if r == rid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(r, cid)] send_id = ('r', k + (bsize * rid)) msg = cp.dumps( (send_id, rblock[(k * ssize):((k + 1) * ssize), :])) droplet.send(dest, msg) num_recvs = (((numrows - 1) * bsize) / ssize) * 2 recv_count = 0 left_recvs = {} right_recvs = {} for l in range(int(bsize / ssize)): left_recvs[l + (bsize * cid)] = lblock[:, (l * ssize):((l + 1) * ssize)] for r in range(int(bsize / ssize)): right_recvs[r + (bsize * rid)] = rblock[(r * ssize):((r + 1) * ssize), :] while recv_count < num_recvs: msgs = droplet.recv() recv_count += (len(msgs)) for msg in msgs: _, body = msg body = cp.loads(body) send_id = body[0] if send_id[0] == 'l': col = body[1] key = send_id[1] left_recvs[key] = col if key in right_recvs: match_vec = right_recvs[key] res = np.add(np.matmul(col, match_vec), res) del right_recvs[key] del left_recvs[key] if send_id[0] == 'r': row = body[1] key = send_id[1] right_recvs[key] = row if key in left_recvs: match_vec = left_recvs[key] res = np.add(np.matmul(match_vec, row), res) del right_recvs[key] del left_recvs[key] for key in left_recvs: left = left_recvs[key] right = right_recvs[key] logging.info(left.shape) logging.info(right.shape) res = np.add(res, np.matmul(left, right)) return res cloud_summa = droplet_client.register(summa, 'summa') if cloud_summa: print('Successfully registered summa function.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' n = 10000 inp1 = np.random.randn(n, n) inp2 = np.random.randn(n, n) nt = 5 nr = nt nc = nt bsize = int(n / nr) def get_block(arr, row, col, bsize): row_start = row * bsize row_end = (row + 1) * bsize col_start = col * bsize col_end = (col + 1) * bsize return arr[row_start:row_end, col_start:col_end] latencies = [] for _ in range(num_requests): time.sleep(.1) uid = str(uuid.uuid4()) rids = {} left_id_map = {} right_id_map = {} for r in range(nr): for c in range(nc): lblock = get_block(inp1, r, c, bsize) rblock = get_block(inp2, r, c, bsize) id1 = str(uuid.uuid4()) id2 = str(uuid.uuid4()) droplet_client.put_object(id1, lblock) droplet_client.put_object(id2, rblock) left_id_map[(r, c)] = id1 right_id_map[(r, c)] = id2 start = time.time() for r in range(nr): for c in range(nc): r1 = DropletReference(left_id_map[(r, c)], True) r2 = DropletReference(right_id_map[(r, c)], True) rids[(r, c)] = cloud_summa(uid, r1, r2, r, c, nr, nc) end = time.time() print('Scheduling to %.6f seconds.' % (end - start)) result = np.zeros((n, n)) get_times = [] send_times = [] comp_times = [] total_times = [] for key in rids: res = rids[key].get() get_times.append(res[1]) send_times.append(res[2]) comp_times.append(res[3]) total_times.append(res[4]) res = res[0] r = key[0] c = key[1] result[(r * bsize):((r + 1) * bsize), (c * bsize):((c + 1) * bsize)] = res end = time.time() latencies.append(end - start) if False in np.isclose(result, np.matmul(inp1, inp2)): print('Failure!') return latencies, [], [], 0
def test_exec_causal_dag_non_sink_with_ref(self): ''' Creates and executes a non-sink function in a causal-mode DAG. This version accesses a KVS key, so we ensure that data is appropriately cached and the metadata is passed downstream. ''' # Create two functions intended to be used in sequence. def incr(_, x): x + 1 iname = 'incr' def square(_, x): return x * x sname = 'square' # Put tthe argument into the KVS. arg_name = 'arg' arg_value = 1 arg = serializer.dump_lattice(arg_value, MultiKeyCausalLattice) self.kvs_client.put(arg_name, arg) # Create a DAG and a trigger for the first function in the DAG. dag = create_linear_dag([incr, square], [iname, sname], self.kvs_client, 'dag', MultiKeyCausalLattice) schedule, triggers = self._create_fn_schedule( dag, DropletReference(arg_name, True), iname, [iname, sname], MULTI) exec_dag_function(self.pusher_cache, self.kvs_client, triggers, incr, schedule, self.user_library, {}, {}) # Assert that there has been a message sent. self.assertEqual(len(self.pusher_cache.socket.outbox), 1) # Extract that message and check its contents. trigger = DagTrigger() trigger.ParseFromString(self.pusher_cache.socket.outbox[0]) self.assertEqual(trigger.id, schedule.id) self.assertEqual(trigger.target_function, sname) self.assertEqual(trigger.source, iname) self.assertEqual(len(trigger.arguments.values), 1) # Check the metadata of the key that is cached here after execution. locs = trigger.version_locations self.assertEqual(len(locs), 1) self.assertTrue(self.ip in locs.keys()) self.assertEqual(len(locs[self.ip].keys), 1) kv = locs[self.ip].keys[0] self.assertEqual(kv.key, arg_name) self.assertEqual(VectorClock(dict(kv.vector_clock), True), arg.vector_clock) # Check the metatada of the causal dependency passed downstream. self.assertEqual(len(trigger.dependencies), 1) kv = trigger.dependencies[0] self.assertEqual(kv.key, arg_name) self.assertEqual(VectorClock(dict(kv.vector_clock), True), arg.vector_clock) val = serializer.load(trigger.arguments.values[0]) self.assertEqual(val, incr('', arg_value))
def run(droplet_client, num_requests, sckt): ''' DEFINE AND REGISTER FUNCTIONS ''' def preprocess(droplet, inp): from skimage import filters return filters.gaussian(inp).reshape(1, 3, 224, 224) def sqnet(droplet, inp): import torch import torchvision model = torchvision.models.squeezenet1_1() return model(torch.tensor(inp.astype(np.float32))).detach().numpy() def average(droplet, inp1, inp2, inp3): import numpy as np inp = [inp1, inp2, inp3] return np.mean(inp, axis=0) cloud_prep = droplet_client.register(preprocess, 'preprocess') cloud_sqnet1 = droplet_client.register(sqnet, 'sqnet1') cloud_sqnet2 = droplet_client.register(sqnet, 'sqnet2') cloud_sqnet3 = droplet_client.register(sqnet, 'sqnet3') cloud_average = droplet_client.register(average, 'average') if cloud_prep and cloud_sqnet1 and cloud_sqnet2 and cloud_sqnet3 and \ cloud_average: print('Successfully registered preprocess, sqnet, and average ' + 'functions.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' arr = np.random.randn(1, 224, 224, 3) prep_test = cloud_prep(arr).get() if type(prep_test) != np.ndarray: print('Unexpected result from preprocess(arr): %s' % (str(prep_test))) sys.exit(1) sqnet_test1 = cloud_sqnet1(prep_test).get() if type(sqnet_test1) != np.ndarray: print('Unexpected result from squeezenet1(arr): %s' % (str(sqnet_test1))) sys.exit(1) sqnet_test2 = cloud_sqnet2(prep_test).get() if type(sqnet_test2) != np.ndarray: print('Unexpected result from squeezenet2(arr): %s' % (str(sqnet_test2))) sys.exit(1) sqnet_test3 = cloud_sqnet3(prep_test).get() if type(sqnet_test3) != np.ndarray: print('Unexpected result from squeezenet3(arr): %s' % (str(sqnet_test3))) sys.exit(1) average_test = cloud_average(sqnet_test1, sqnet_test2, sqnet_test3).get() if type(average_test) != np.ndarray: print('Unexpected result from squeezenet(arr): %s' % (str(average_test))) sys.exit(1) print('Successfully tested functions!') ''' CREATE DAG ''' dag_name = 'pred_serving' functions = ['preprocess', 'sqnet1', 'sqnet2', 'sqnet3', 'average'] connections = [('preprocess', 'sqnet1'), ('preprocess', 'sqnet2'), ('preprocess', 'sqnet3'), ('sqnet1', 'average'), ('sqnet2', 'average'), ('sqnet3', 'average')] success, error = droplet_client.register_dag(dag_name, functions, connections) if not success: print('Failed to register DAG: %s' % (str(error))) sys.exit(1) ''' RUN DAG ''' total_time = [] # Create all the input data oids = [] for _ in range(num_requests): arr = np.random.randn(1, 224, 224, 3) oid = str(uuid.uuid4()) oids.append(oid) droplet_client.put(oid, arr) for i in range(num_requests): oid = oids[i] arg_map = {'preprocess': [DropletReference(oid, True)]} start = time.time() droplet_client.call_dag(dag_name, arg_map, True) end = time.time() total_time += [end - start] if sckt: sckt.send(cp.dumps(total_time)) return total_time, [], [], 0
def run(droplet_client, num_requests, create, sckt): dag_name = 'locality' kvs_key = 'LOCALITY_OIDS' if create: ''' DEFINE AND REGISTER FUNCTIONS ''' def dot(droplet, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10): import numpy as np s1 = np.add(v1, v2) s2 = np.add(v3, v4) s3 = np.add(v5, v6) s4 = np.add(v7, v8) s5 = np.add(v9, v10) s1 = np.add(s1, s2) s2 = np.add(s3, s4) s1 = np.add(s1, s2) s1 = np.add(s1, s5) return np.average(s1) cloud_dot = droplet_client.register(dot, 'dot') if cloud_dot: logging.info('Successfully registered the dot function.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' refs = () for _ in range(10): inp = np.zeros(OSIZE) k = str(uuid.uuid4()) droplet_client.put_object(k, inp) refs += (DropletReference(k, True), ) dot_test = cloud_dot(*refs).get() if dot_test != 0.0: print('Unexpected result from dot(v1, v2): %s' % (str(dot_test))) sys.exit(1) logging.info('Successfully tested function!') ''' CREATE DAG ''' functions = ['dot'] connections = [] success, error = droplet_client.register_dag(dag_name, functions, connections) if not success and error != DAG_ALREADY_EXISTS: print('Failed to register DAG: %s' % (DropletError.Name(error))) sys.exit(1) # for the hot version oid = str(uuid.uuid4()) arr = np.random.randn(OSIZE) droplet_client.put_object(oid, arr) droplet_client.put_object(kvs_key, [oid]) return [], [], [], 0 else: ''' RUN DAG ''' # num_data_objects = num_requests * 10 # for the cold version # oids = [] # for i in range(num_data_objects): # if i % 100 == 0: # logging.info('On object %d.' % (i)) # array = np.random.rand(OSIZE) # oid = str(uuid.uuid4()) # droplet_client.put_object(oid, array) # oids.append(oid) # logging.info('Finished creating data!') # for the hot version oids = droplet_client.get_object(kvs_key) total_time = [] scheduler_time = [] kvs_time = [] retries = 0 log_start = time.time() log_epoch = 0 epoch_total = [] for i in range(num_requests): refs = [] # for ref in oids[(i * 10):(i * 10) + 10]: # for the cold version # refs.append(DropletReference(ref, True)) for _ in range(10): # for the hot version refs.append(DropletReference(oids[0], True)) start = time.time() arg_map = {'dot': refs} droplet_client.call_dag(dag_name, arg_map, True) end = time.time() epoch_total.append(end - start) total_time.append(end - start) log_end = time.time() if (log_end - log_start) > 10: if sckt: sckt.send(cp.dumps(epoch_total)) utils.print_latency_stats(epoch_total, 'EPOCH %d E2E' % (log_epoch), True) epoch_total.clear() log_epoch += 1 log_start = time.time() return total_time, scheduler_time, kvs_time, retries