def test_exec_causal_dag_sink(self): ''' Tests that the last function in a causal DAG executes correctly and stores the result in the KVS. Also checks to make sure that causal metadata is properly created. ''' def func(_, x): return x * x fname = 'square' arg = 2 dag = create_linear_dag([func], [fname], self.kvs_client, 'dag', MultiKeyCausalLattice) schedule, triggers = self._create_fn_schedule(dag, arg, fname, [fname], MULTI) schedule.output_key = 'output_key' schedule.client_id = '12' # We know that there is only one trigger. We populate dependencies # explicitly in this trigger message to make sure that they are # reflected in the final result. kv = triggers['BEGIN'].dependencies.add() kv.key = 'dependency' DEFAULT_VC.serialize(kv.vector_clock) exec_dag_function(self.pusher_cache, self.kvs_client, triggers, func, schedule, self.user_library, {}, {}) # Assert that there have been 0 messages sent. self.assertEqual(len(self.socket.outbox), 0) # Retrieve the result and check its value and its metadata. result = self.kvs_client.get(schedule.output_key)[schedule.output_key] self.assertEqual(type(result), MultiKeyCausalLattice) # Check that the vector clock of the output corresponds ot the client # ID. self.assertEqual(result.vector_clock, VectorClock({schedule.client_id: 1}, True)) # Check that the dependencies of the output match those specified in # the trigger. self.assertEqual(len(result.dependencies.reveal()), 1) self.assertTrue(kv.key in result.dependencies.reveal()) self.assertEqual(result.dependencies.reveal()[kv.key], DEFAULT_VC) # Check that the output is equal to a local function execution. result = serializer.load_lattice(result)[0] self.assertEqual(result, func('', arg))
def _exec_dag_function_causal(pusher_cache, kvs, triggers, function, schedule, user_lib): schedule = schedule[0] triggers = triggers[0] fname = schedule.target_function fargs = list(schedule.arguments[fname].values) key_version_locations = {} dependencies = {} for trigger in triggers: fargs += list(trigger.arguments.values) # Combine the locations of upstream cached key versions from all # triggers. for addr in trigger.version_locations: if addr in key_version_locations: key_version_locations[addr].extend( trigger.version_locations[addr].key_versions) else: key_version_locations[addr] = list( trigger.version_locations[addr]) # Combine the dependency sets from all triggers. for dependency in trigger.dependencies: vc = VectorClock(dict(dependency.vector_clock), True) key = dependency.key if key in dependencies: dependencies[key].merge(vc) else: dependencies[key] = vc fargs = [serializer.load(arg) for arg in fargs] result = _exec_func_causal(kvs, function, fargs, user_lib, schedule, key_version_locations, dependencies) this_ref = None for ref in schedule.dag.functions: if ref.name == fname: this_ref = ref # There must be a match. success = True if this_ref.type == MULTIEXEC: if serializer.dump(result) in this_ref.invalid_results: return False, False # Create a new trigger with the schedule ID and results of this execution. new_trigger = _construct_trigger(schedule.id, fname, result) # Serialize the key version location information into this new trigger. for addr in key_version_locations: new_trigger.version_locations[addr].keys.extend( key_version_locations[addr]) # Serialize the set of dependency versions for causal metadata. for key in dependencies: dep = new_trigger.dependencies.add() dep.key = key dependencies[key].serialize(dep.vector_clock) is_sink = True for conn in schedule.dag.connections: if conn.source == fname: is_sink = False new_trigger.target_function = conn.sink dest_ip = schedule.locations[conn.sink] sckt = pusher_cache.get(sutils.get_dag_trigger_address(dest_ip)) sckt.send(new_trigger.SerializeToString()) if is_sink: logging.info('DAG %s (ID %s) completed in causal mode; result at %s.' % (schedule.dag.name, schedule.id, schedule.output_key)) vector_clock = {} okey = schedule.output_key if okey in dependencies: prev_count = 0 if schedule.client_id in dependencies[okey]: prev_count = dependencies[okey][schedule.client_id] dependencies[okey].update(schedule.client_id, prev_count + 1) dependencies[okey].serialize(vector_clock) del dependencies[okey] else: vector_clock = {schedule.client_id: 1} # Serialize result into a MultiKeyCausalLattice. vector_clock = VectorClock(vector_clock, True) result = serializer.dump(result) dependencies = MapLattice(dependencies) lattice = MultiKeyCausalLattice(vector_clock, dependencies, SetLattice({result})) succeed = kvs.causal_put(schedule.output_key, lattice, schedule.client_id) while not succeed: succeed = kvs.causal_put(schedule.output_key, lattice, schedule.client_id) # Issues requests to all upstream caches for this particular request # and asks them to garbage collect pinned versions stored for the # context of this request. for cache_addr in key_version_locations: gc_address = utils.get_cache_gc_address(cache_addr) sckt = pusher_cache.get(gc_address) sckt.send_string(schedule.client_id) return is_sink, [success]
# For message sending via the user library. RECV_INBOX_PORT = 5500 STATISTICS_REPORT_PORT = 7006 # Create a generic error response protobuf. error = GenericResponse() error.success = False # Create a generic success response protobuf. ok = GenericResponse() ok.success = True ok_resp = ok.SerializeToString() # Create a default vector clock for keys that have no dependencies. DEFAULT_VC = VectorClock({'base': MaxIntLattice(1)}) def get_func_kvs_name(fname): return FUNC_PREFIX + fname def get_dag_trigger_address(address): ip, tid = address.split(':') return 'tcp://' + ip + ':' + str(int(tid) + DAG_EXEC_PORT) def get_statistics_report_address(mgmt_ip): return 'tcp://' + mgmt_ip + ':' + str(STATISTICS_REPORT_PORT)
def test_exec_causal_dag_non_sink_with_ref(self): ''' Creates and executes a non-sink function in a causal-mode DAG. This version accesses a KVS key, so we ensure that data is appropriately cached and the metadata is passed downstream. ''' # Create two functions intended to be used in sequence. def incr(_, x): x + 1 iname = 'incr' def square(_, x): return x * x sname = 'square' # Put tthe argument into the KVS. arg_name = 'arg' arg_value = 1 arg = serializer.dump_lattice(arg_value, MultiKeyCausalLattice) self.kvs_client.put(arg_name, arg) # Create a DAG and a trigger for the first function in the DAG. dag = create_linear_dag([incr, square], [iname, sname], self.kvs_client, 'dag', MultiKeyCausalLattice) schedule, triggers = self._create_fn_schedule( dag, CloudburstReference(arg_name, True), iname, [iname, sname], MULTI) exec_dag_function(self.pusher_cache, self.kvs_client, triggers, incr, schedule, self.user_library, {}, {}) # Assert that there has been a message sent. self.assertEqual(len(self.pusher_cache.socket.outbox), 1) # Extract that message and check its contents. trigger = DagTrigger() trigger.ParseFromString(self.pusher_cache.socket.outbox[0]) self.assertEqual(trigger.id, schedule.id) self.assertEqual(trigger.target_function, sname) self.assertEqual(trigger.source, iname) self.assertEqual(len(trigger.arguments.values), 1) # Check the metadata of the key that is cached here after execution. locs = trigger.version_locations self.assertEqual(len(locs), 1) self.assertTrue(self.ip in locs.keys()) self.assertEqual(len(locs[self.ip].keys), 1) kv = locs[self.ip].keys[0] self.assertEqual(kv.key, arg_name) self.assertEqual(VectorClock(dict(kv.vector_clock), True), arg.vector_clock) # Check the metatada of the causal dependency passed downstream. self.assertEqual(len(trigger.dependencies), 1) kv = trigger.dependencies[0] self.assertEqual(kv.key, arg_name) self.assertEqual(VectorClock(dict(kv.vector_clock), True), arg.vector_clock) val = serializer.load(trigger.arguments.values[0]) self.assertEqual(val, incr('', arg_value))
def _deserialize(self, tup): if tup.lattice_type == LWW: # Deserialize last-writer-wins lattices val = LWWValue() val.ParseFromString(tup.payload) return LWWPairLattice(val.timestamp, val.value) elif tup.lattice_type == SET: # Deserialize unordered-set lattices s = SetValue() s.ParseFromString(tup.payload) result = set() for k in s.values: result.add(k) return SetLattice(result) elif tup.lattice_type == ORDERED_SET: # Deserialize ordered-set lattices res = ListBasedOrderedSet() val = SetValue() val.ParseFromString(tup.payload) for v in val.values: res.insert(v) return OrderedSetLattice(res) elif tup.lattice_type == SINGLE_CAUSAL: # Deserialize single-key causal lattices val = SingleKeyCausalValue() # Deserialize the vector_clock stored in the Protobuf into a # MapLattice, where each value is a MaxIntLattice of the VC # counter. vc = VectorClock(val.vector_clock, True) # Create a SetLattice with the value(s) stored by this lattice. values = set() for v in val.values(): values.add(v) return SingleKeyCasaulLattice(vc, SetLattice(values)) elif tup.lattice_type == MULTI_CAUSAL: # Deserialize multi-key causal lattices val = MultiKeyCausalValue() # Deserialize the vector_clock stored in the Protobuf into a # MapLattice, where each value is a MaxIntLattice of the VC # counter. vc = VectorClock(val.vector_clock, True) # Deserialize the set of dependencies of this key into a MapLattice # where the keys are names of other KVS keys and the values are # MapLattices that have the vector clocks for those keys. dep_map = {} for kv in val.dependencies: key = kv.key dep_map[key] = VectorClock(kv.vector_clock, True) # Create a SetLattice with the value(s) stored by this lattice. values = set() for v in val.values(): values.add(v) dependencies = MapLattice(dep_map) value = SetLattice(values) return MultiKeyCausalLattice(vc, dependencies, value) else: raise ValueError('Unsupported type cannot be serialized: ' + str(tup.lattice_type))