def call_dag(call, requestor_cache, pusher_cache, dags, func_locations, key_ip_map): logging.info('Calling DAG %s.' % (call.name)) dag, sources = dags[call.name] chosen_locations = {} for f in dag.functions: locations = func_locations[f] args = call.function_args[f].args refs = list(filter(lambda arg: type(arg) == FluentReference, map(lambda arg: get_serializer(arg.type).load(arg.body), args))) loc = _pick_node(locations, key_ip_map, refs) chosen_locations[f] = (loc[0], loc[1]) schedule = DagSchedule() schedule.id = generate_timestamp(0) schedule.dag.CopyFrom(dag) # copy over arguments into the dag schedule for fname in call.function_args: arg_list = schedule.arguments[fname] arg_list.args.extend(call.function_args[fname].args) resp_id = str(uuid.uuid4()) schedule.response_id = resp_id for func in chosen_locations: loc = chosen_locations[func] schedule.locations[func] = loc[0] + ':' + str(loc[1]) for func in chosen_locations: loc = chosen_locations[func] ip = utils._get_queue_address(loc[0], loc[1]) schedule.target_function = func sckt = requestor_cache.get(ip) sckt.send(schedule.SerializeToString()) response = GenericResponse() response.ParseFromString(sckt.recv()) if not response.success: logging.info('Pin operation for %s at %s failed.' % (func, ip)) return response.success, response.error, None for source in sources: trigger = DagTrigger() trigger.id = schedule.id trigger.target_function = source ip = sutils._get_dag_trigger_address(schedule.locations[source]) sckt = pusher_cache.get(ip) sckt.send(trigger.SerializeToString()) return True, None, resp_id
def call_dag(call, pusher_cache, dags, func_locations, key_ip_map, running_counts, backoff): dag, sources = dags[call.name] schedule = DagSchedule() schedule.id = str(uuid.uuid4()) schedule.dag.CopyFrom(dag) schedule.consistency = NORMAL if call.HasField('response_address'): schedule.response_address = call.response_address logging.info('Calling DAG %s (%s).' % (call.name, schedule.id)) for fname in dag.functions: locations = func_locations[fname] args = call.function_args[fname].args refs = list(filter(lambda arg: type(arg) == FluentReference, map(lambda arg: get_serializer(arg.type).load(arg.body), args))) loc = _pick_node(locations, key_ip_map, refs, running_counts, backoff) schedule.locations[fname] = loc[0] + ':' + str(loc[1]) # copy over arguments into the dag schedule arg_list = schedule.arguments[fname] arg_list.args.extend(args) logging.info('Sending to %s' %(schedule.locations['sleep'])) for func in schedule.locations: loc = schedule.locations[func].split(':') ip = utils._get_queue_address(loc[0], loc[1]) schedule.target_function = func triggers = sutils._get_dag_predecessors(dag, func) if len(triggers) == 0: triggers.append('BEGIN') schedule.ClearField('triggers') schedule.triggers.extend(triggers) sckt = pusher_cache.get(ip) sckt.send(schedule.SerializeToString()) for source in sources: trigger = DagTrigger() trigger.id = schedule.id trigger.source = 'BEGIN' trigger.target_function = source ip = sutils._get_dag_trigger_address(schedule.locations[source]) sckt = pusher_cache.get(ip) sckt.send(trigger.SerializeToString()) return schedule.id
def _exec_dag_function_normal(pusher_cache, kvs, triggers, function, schedule, user_lib): fname = schedule.target_function fargs = list(schedule.arguments[fname].args) for trname in schedule.triggers: trigger = triggers[trname] fargs += list(trigger.arguments.args) logging.info('Executing function %s for DAG %s (ID %s): started at %.6f.' % (schedule.dag.name, fname, trigger.id, time.time())) fargs = _process_args(fargs) result = _exec_func_normal(kvs, function, fargs, user_lib) is_sink = True for conn in schedule.dag.connections: if conn.source == fname: is_sink = False new_trigger = DagTrigger() new_trigger.id = trigger.id new_trigger.target_function = conn.sink new_trigger.source = fname if type(result) != tuple: result = (result, ) al = new_trigger.arguments al.args.extend( list(map(lambda v: serialize_val(v, None, False), result))) dest_ip = schedule.locations[conn.sink] sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip)) sckt.send(new_trigger.SerializeToString()) logging.info( 'Finished executing function %s for DAG %s (ID %s): ended at %.6f.' % (schedule.dag.name, fname, trigger.id, time.time())) if is_sink: logging.info('DAG %s (ID %s) completed; result at %s.' % (schedule.dag.name, trigger.id, schedule.id)) result = serialize_val(result) if schedule.HasField('response_address'): sckt = pusher_cache.get(schedule.response_address) sckt.send(result) else: l = LWWPairLattice(generate_timestamp(0), result) kvs.put(schedule.id, l)
def exec_dag_function(pusher_cache, kvs, trigger, function, schedule): fname = trigger.target_function logging.info('Executing function %s for DAG %s (ID %d).' % (schedule.dag.name, fname, trigger.id)) fargs = list(schedule.arguments[fname].args) + list(trigger.arguments.args) fargs = _process_args(fargs) result = _exec_func(kvs, function, fargs) result_triggers = [] is_sink = True for conn in schedule.dag.connections: if conn.source == fname: is_sink = False new_trigger = DagTrigger() new_trigger.id = trigger.id new_trigger.target_function = conn.sink if type(result) != tuple: result = (result, ) al = new_trigger.arguments al.args.extend( list(map(lambda v: serialize_val(v, None, False), result))) dest_ip = schedule.locations[conn.sink] sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip)) sckt.send(new_trigger.SerializeToString()) if is_sink: logging.info('DAG %s (ID %d) completed; result at %s.' % (schedule.dag.name, trigger.id, schedule.response_id)) l = LWWPairLattice(generate_timestamp(0), serialize_val(result)) kvs.put(schedule.response_id, l)
def _exec_dag_function_causal(pusher_cache, kvs, triggers, function, schedule): fname = schedule.target_function fargs = list(schedule.arguments[fname].args) versioned_key_locations = None dependencies = {} for trname in schedule.triggers: trigger = triggers[trname] fargs += list(trigger.arguments.args) # combine versioned_key_locations if versioned_key_locations is None: versioned_key_locations = trigger.versioned_key_locations else: for addr in trigger.versioned_key_locations: versioned_key_locations[addr].versioned_keys.extend( trigger.versioned_key_locations[addr].versioned_keys) # combine dependencies from previous func for dep in trigger.dependencies: if dep.key in dependencies: dependencies[dep.key] = _merge_vector_clock( dependencies[dep.key], dep.vector_clock) else: dependencies[dep.key] = dep.vector_clock fargs = _process_args(fargs) kv_pairs = {} result = _exec_func_causal(kvs, function, fargs, kv_pairs, schedule, versioned_key_locations, dependencies) for key in kv_pairs: if key in dependencies: dependencies[key] = _merge_vector_clock(dependencies[key], kv_pairs[key][0]) else: dependencies[key] = kv_pairs[key][0] is_sink = True for conn in schedule.dag.connections: if conn.source == fname: is_sink = False new_trigger = DagTrigger() new_trigger.id = trigger.id new_trigger.target_function = conn.sink new_trigger.source = fname if type(result) != tuple: result = (result, ) al = new_trigger.arguments al.args.extend( list(map(lambda v: serialize_val(v, None, False), result))) for addr in versioned_key_locations: new_trigger.versioned_key_locations[ addr].versioned_keys.extend( versioned_key_locations[addr].versioned_keys) for key in dependencies: dep = new_trigger.dependencies.add() dep.key = key dep.vector_clock.update(dependencies[key]) dest_ip = schedule.locations[conn.sink] sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip)) sckt.send(new_trigger.SerializeToString()) if is_sink: logging.info('DAG %s (ID %s) completed in causal mode; result at %s.' % (schedule.dag.name, schedule.id, schedule.output_key)) vector_clock = {} if schedule.output_key in dependencies: if schedule.client_id in dependencies[schedule.output_key]: dependencies[schedule.output_key][schedule.client_id] += 1 else: dependencies[schedule.output_key][schedule.client_id] = 1 vector_clock.update(dependencies[schedule.output_key]) del dependencies[schedule.output_key] else: vector_clock = {schedule.client_id: 1} succeed = kvs.causal_put(schedule.output_key, vector_clock, dependencies, serialize_val(result), schedule.client_id) while not succeed: kvs.causal_put(schedule.output_key, vector_clock, dependencies, serialize_val(result), schedule.client_id) # issue requests to GC the version store for cache_addr in versioned_key_locations: gc_addr = cache_addr[:-4] + str(int(cache_addr[-4:]) - 50) sckt = pusher_cache.get(gc_addr) sckt.send_string(schedule.client_id)