def trigger_statemachine(run_id: str, execution_input: dict = None, event_source: EventSource = None): log.info("Trigger State Machine Execution: {}".format(run_id)) if execution_input is None: execution_input = {} uuid = uuid4() init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType( 'lambda.success').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) if execution_input is not None: init_cloudevent.SetData(execution_input) init_cloudevent.SetContentType('application/json') if event_source is None: credentials = get_config()['statemachines']['aws'] event_source = SQSEventSource( name=run_id + '_' + 'SQSEventSource', access_key_id=credentials['access_key_id'], secret_access_key=credentials['secret_access_key'], region=credentials['region'], queue=run_id) event_source.set_stream(run_id) event_source.publish_cloudevent(init_cloudevent) log.info("Ok")
def __trigger(self, silent=False): event_source = list(self.dag.event_sources.values()).pop() uuid = uuid4() init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType( 'event.triggerflow.init').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) event_source.set_stream(self.dagrun_id) event_source.publish_cloudevent(init_cloudevent) self.state = DAGRun.State.RUNNING if not silent: print('DAG Run ID: {}'.format(self.dagrun_id)) return self
def trigger_statemachine(run_id: str): config = get_config() credentials = config['statemachines']['aws'] event_source = SQSEventSource( access_key_id=credentials['access_key_id'], secret_access_key=credentials['secret_access_key'], region=credentials['region'], queue=run_id) uuid = uuid4() init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType( 'lambda.success').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) event_source.publish_cloudevent(init_cloudevent)
def trigger(self, execution_input: dict = None): log.info("Trigger State Machine Execution: {}".format(self.run_id)) if execution_input is None: execution_input = {} uuid = uuid4() init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType( 'lambda.success').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) if execution_input is not None: init_cloudevent.SetData(execution_input) init_cloudevent.SetContentType('application/json') self.event_source.publish_cloudevent(init_cloudevent) log.info("Ok")
def setup_triggers(): kaf = KafkaEventSource(broker_list=['127.0.0.1:9092'], topic='ingestion') tf = Triggerflow() tf.create_workspace(workspace_name='ingestion-test', event_source=kaf) for i in range(200): uuid = uuid4() cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType( 'event.triggerflow.test').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) tf.add_trigger(event=cloudevent, trigger_id="join{}".format(i), condition=DefaultConditions.JOIN, action=DefaultActions.TERMINATE, context={'join': 1000}, transient=False)
def setup_triggers(): rabbit = RabbitMQEventSource(amqp_url='amqp://*****:*****@127.0.0.1:5672', queue='ingestion') tf = Triggerflow() tf.create_workspace(workspace_name='ingestion-test', event_source=rabbit) for i in range(200): uuid = uuid4() cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType( 'event.triggerflow.test').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) tf.add_trigger(event=cloudevent, trigger_id="join{}".format(i), condition=DefaultConditions.JOIN, action=DefaultActions.TERMINATE, context={'join': 1000}, transient=False)
def setup_triggers(): red = RedisEventSource(host="127.0.0.1", port=6379, password="******") red.set_stream(stream) tf = Triggerflow() tf.create_workspace(workspace_name='ingestion-test', event_source=red) for i in range(200): uuid = uuid4() cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType( 'event.triggerflow.test').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) tf.add_trigger(event=cloudevent, trigger_id="join{}".format(i), condition=DefaultConditions.JOIN, action=DefaultActions.TERMINATE, context={'join': 1000}, transient=False)
def setup_triggers(): red = RedisEventSource(host="127.0.0.1", port=6379, password="******") tf = Triggerflow() tf.create_workspace(workspace_name='ingestion-test', event_source=red) for i in range(200): uuid = uuid4() cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType( 'event.triggerflow.test').SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) tf.add_trigger(event=cloudevent, trigger_id="join{}".format(i), condition=DefaultConditions.SIMPLE_JOIN, action=DefaultActions.TERMINATE, context={'join': 1000}, context_parser="JOIN", transient=False)
def main(args): task = args['task'] task_result_keys = args['task_result_keys'] agg_result_key = args['agg_result_key'] os = CloudFileProxy(CloudStorage(args['cb_config'])) open = os.open # Load stored client results task_results = [] for k in task_result_keys: with open(k, 'rb') as f: task_results.append(pickle.loads(f.read())) # Aggregate if task == 'train': current_weights_key = args['current_weights_key'] if os.path.exists(current_weights_key): with open(current_weights_key, 'rb') as f: curr_coef, curr_intercept = pickle.loads(f.read()) else: curr_coef, curr_intercept = (0, 0) agg_result = aggregate(task_results, curr_coef, curr_intercept) if task == 'test': agg_result = sum(task_results) / len(task_results) # Store result with open(agg_result_key, 'wb') as f: f.write(pickle.dumps(agg_result)) # Delete client results [os.remove(k) for k in task_result_keys] redis_source = RedisEventSource(**args['cb_config']['redis'], stream='fedlearn') event = CloudEvent().SetEventType('aggregation_complete').SetSubject( 'fedlearn.aggregator') redis_source.publish_cloudevent(event) return {'success': 1}
def setup(): client_config = get_triggerflow_config('~/client_config.yaml') kafka_config = client_config['event_sources']['kafka'] tf = TriggerflowClient(**client_config['triggerflow'], caching=True) kafka = KafkaEventSource(name='stress_kafka', broker_list=kafka_config['broker_list'], topic='stress_kafka') # tf.create_workspace(workspace='stress_kafka', event_source=kafka) tf.target_workspace(workspace='stress_kafka') for i in range(N_STEPS): for j in range(N_MAPS): tf.add_trigger(CloudEvent('map_{}_{}'.format(i, j)), action=DefaultActions.PASS, condition=DefaultConditions.SIMPLE_JOIN, context={'total_activations': N_JOIN[i]}) tf.commit_cached_triggers()
model_state = manager.Value() state = { 'task': 'train', 'interval': 5, 'round_table': [0] * ROUND_N, 'current_weights_key': 'model_weights', 'iter_count': 0 } model_state.value = state # ## Aggregation trigger # In[ ]: # Create the trigger activation event client_act_event = CloudEvent().SetEventType('client_task_result').SetSubject( 'fedlearn.client') # Create a custom Python callable condition def custom_join(context, event): context['task_result_keys'].append(event['data']['result_key']) context['task'] = event['data']['task'] if len(context['task_result_keys']) == context['join']: context['invoke_kwargs'] = { 'task': context['task'], 'cb_config': context['cb_config'], 'task_result_keys': context['task_result_keys'], 'current_weights_key': context['current_weights_key'], } if context['task'] == 'train':
def __create_triggers(self): tf = TriggerflowCachedClient() # Create unique workspace for this specific dag run and its event sources event_sources = list(self.dag.event_sources.values()) # Set current DAGRun ID as topic/queue name for the event sources [ event_source.set_stream(self.dagrun_id) for event_source in event_sources ] tf.create_workspace(workspace_name=self.dagrun_id, event_source=event_sources.pop(), global_context={}) for event_source in event_sources: tf.add_event_source(event_source) for task in self.dag.tasks: context = { 'subject': task.task_id, 'dependencies': {}, 'operator': task.get_trigger_meta(), 'result': [] } # If this task does not have upstream relatives, then it will be executed when the sentinel event __init__ # is produced, else, it will be executed every time one of its upstream relatives produces its term. event if not task.upstream_relatives: condition = DefaultConditions.TRUE # Initial task do not have dependencies activation_event = CloudEvent().SetSubject( '__init__').SetEventType('event.triggerflow.init') act_events = [activation_event] else: condition = DefaultConditions.DAG_TASK_JOIN act_events = [] for upstream_relative in task.upstream_relatives: context['dependencies'][upstream_relative.task_id] = { 'join': -1, 'counter': 0 } activation_event = (CloudEvent().SetSubject( upstream_relative.task_id).SetEventType( 'event.triggerflow.termination.success')) act_events.append(activation_event) # Add a trigger that handles this task execution: It will be fired every time one of its upstream # relatives sends its termination event, but it is executed only when all dependencies are fulfilled tf.add_trigger(event=act_events, action=DefaultActions[task.trigger_action_name], condition=condition, context=context, trigger_id=task.task_id, transient=False) # Join final tasks (those that do not have downstream relatives) context = { 'subject': '__end__', 'dependencies': { final_task.task_id: { 'join': -1, 'counter': 0 } for final_task in self.dag.final_tasks }, 'result': [] } activation_events = [ (CloudEvent().SetSubject(final_task.task_id).SetEventType( 'event.triggerflow.termination.success')) for final_task in self.dag.final_tasks ] tf.add_trigger(event=activation_events, action=DefaultActions.TERMINATE, condition=DefaultConditions.DAG_TASK_JOIN, context=context, trigger_id='__end__', transient=False) # Add error handling trigger: All tasks that produce a failure event type will fire this trigger # activation_event = v1.Event().SetSubject('*').SetEventType('event.triggerflow.termination.failure') # tf.add_trigger(event=activation_event, # action=DefaultActions.DAG_TASK_FAILURE_HANDLER, # condition=DefaultConditions.TRUE, # context={}, # context_parser='DAGS', # trigger_id='__error_handler__', # transient=False) # # # Add retry handler trigger: We will use this trigger to manually fire it to retry any failed task # activation_event = v1.Event().SetSubject('__retry__').SetEventType('event.triggerflow.termination.failure') # tf.add_trigger(event=activation_event, # action=DefaultActions.DAG_TASK_RETRY_HANDLER, # condition=DefaultConditions.TRUE, # context={}, # context_parser='DAGS', # trigger_id='__retry_handler__', # transient=False) tf.commit_cached_triggers() self.state = DAGRun.State.DEPLOYED
def state_machine(states, trigger_event): nonlocal state_machine_count, queue_arn, lambdas_updated state_machine_id = 'StateMachine{}'.format(state_machine_count) state_machine_count += 1 upstream_relatives = defaultdict(list) final_states = [] choices = {} for state_name, state in states['States'].items(): if 'End' in state and state['End']: final_states.append(state_name) elif 'Next' in state: upstream_relatives[state['Next']].append(state_name) elif state['Type'] == 'Choice': for choice in state['Choices']: upstream_relatives[choice['Next']].append(state_name) upstream_relatives[states['StartAt']].extend(trigger_event) for state_name, state in states['States'].items(): context = {'Subject': state_name, 'State': state.copy()} if state_name in choices: context['Condition'] = choices[state_name].copy() if state['Type'] == 'Pass' or state['Type'] == 'Task': subjects = upstream_relatives[state_name] activation_events = [ CloudEvent().SetEventType('lambda.success').SetSubject( sub) for sub in subjects ] action = AwsAsfActions.AWS_ASF_TASK if state[ 'Type'] == 'Task' else AwsAsfActions.AWS_ASF_PASS if state['Type'] == 'Task' and isinstance(self.event_source, SQSEventSource) and \ state['Resource'] not in lambdas_updated: self.__add_destination_to_lambda( state['Resource'], queue_arn) lambdas_updated[state['Resource']] = True triggerflow_client.add_trigger( event=activation_events, condition=AwsAsfConditions.AWS_ASF_CONDITION, action=action, context=context, trigger_id=state_name, transient=False) elif state['Type'] == 'Choice': choices = {} for choice in state['Choices']: upstream_relatives[ choice['Next']] = upstream_relatives[state_name] choices[choice['Next']] = choice.copy() elif state['Type'] == 'Parallel': sub_state_machines = [] for branch in state['Branches']: sub_sm_id = state_machine( branch, upstream_relatives[state_name]) sub_state_machines.append(sub_sm_id) context['join_multiple'] = len(state['Branches']) del context['State'] act_events = [ CloudEvent().SetEventType('lambda.success').SetSubject( sub_sm) for sub_sm in sub_state_machines ] triggerflow_client.add_trigger( event=act_events, condition=AwsAsfConditions.AWS_ASF_JOIN_STATEMACHINE, action=AwsAsfActions.AWS_ASF_PASS, context=context, trigger_id=state_name, transient=False) elif state['Type'] == 'Wait': raise NotImplementedError() elif state['Type'] == 'Map': iterator = state_machine(state['Iterator'], [state_name]) context['join_state_machine'] = iterator del context['State']['Iterator'] subjects = upstream_relatives[state_name] activation_events = [ CloudEvent().SetEventType('lambda.success').SetSubject( sub) for sub in subjects ] triggerflow_client.add_trigger( event=activation_events, condition=AwsAsfConditions.AWS_ASF_CONDITION, action=AwsAsfActions.AWS_ASF_MAP, context=context, trigger_id=state_name, transient=False) if 'Next' in state: upstream_relatives[state['Next']].remove(state_name) upstream_relatives[state['Next']].append(iterator) if 'End' in state: final_states.remove(state_name) final_states.append(iterator) elif state['Type'] == 'Succeed': raise NotImplementedError() elif state['Type'] == 'Fail': raise NotImplementedError() activation_events = [ CloudEvent().SetEventType('lambda.success').SetSubject(sub) for sub in final_states ] triggerflow_client.add_trigger( activation_events, condition=AwsAsfConditions.AWS_ASF_JOIN_STATEMACHINE, action=AwsAsfActions.AWS_ASF_END_STATEMACHINE, context={'Subject': state_machine_id}, trigger_id=state_machine_id, transient=False) return state_machine_id
# Setup event triggers EventStream(redis_source, global_context).match({ EventPattern(subject=r'^orchestrator$', type=r'.*'): EventHandler(condition=PythonCallable(orchestrator_condition), action=PythonCallable(orchestrator_action), context={ 'round': 1, 'client_endpoint': CLIENT_FUNCTION_ENDPOINT, 'total_clients': TOTAL_CLIENTS, 'max_rounds': 3 }), EventPattern(subject=r'^aggregator$', type=r'.*'): EventHandler(condition=PythonCallable(aggregator_condition), action=PythonCallable(aggregator_action), context={ 'round': 1, 'result_keys': [], 'counter': {}, 'threshold': .65, 'aggregator_endpoint': AGGREGATOR_FUNCTION_ENDPOINT, 'total_clients': TOTAL_CLIENTS }) }) # Fire 'orchestrator' trigger manually and start the process round_start_event = CloudEvent().SetEventType( 'round_start.federated_learning.triggerflow').SetSubject('orchestrator') round_start_event.SetData({'round': 1, 'task': 'train'}) redis_source.publish_cloudevent(round_start_event)
def main(lock, model_state): place = None while place is None: # Attempt to participate in the training round with lock: state = model_state.value interval = state['interval'] # A place will be obtained if: # - there are free places to take (timestamp == 0) # - some client has not completed its training within the interval oldest = 0 t_now = time.time() for i, timestamp in enumerate(state['round_table']): if timestamp == -1: continue t_elapsed = t_now - timestamp if t_elapsed > interval: place = i break if t_elapsed > oldest: oldest = t_elapsed if place is not None: # Take this place by putting the current timestamp state['round_table'][place] = t_now model_state.value = state print('Acquired place:', place, '|', state['round_table']) if place is None: # Retry when the interval of the oldest client training has expired print('Sleeping for:', interval - oldest) time.sleep(interval - oldest) task = state['task'] # 'train' or 'test' n = len(state['round_table']) X, y = load_data(task, place, n) if os.path.exists(state['current_weights_key']): with open(state['current_weights_key'], 'rb') as f: coef, intercept = pickle.loads(f.read()) else: coef, intercept = None, None if task == 'train': result = fit(X, y, coef, intercept) if task == 'test': result = test(X, y, coef, intercept) lock.acquire() state = model_state.value # If our place was not revoked # (could have taken too long to train) if state['round_table'][place] == t_now: # Mark as completed state['round_table'][place] = -1 print('Task done, place:', place, '|', state['round_table']) # Store result result_key = get_uuid() with open(result_key, 'wb') as f: f.write(pickle.dumps(result)) # If the round is not complete, release the lock and continue if state['round_table'].count(-1) != len(state['round_table']): model_state.value = state lock.release() # Otherwise the lock will be released when the aggregator # finishes and the next round starts # Send task complete event with the result key redis_source = RedisEventSource(**default_config()['redis'], stream='fedlearn') event = CloudEvent().SetEventType('client_task_result').SetSubject('fedlearn.client') event.SetData({ 'result_key': result_key, 'task': task }) redis_source.publish_cloudevent(event) print('Result event sent') else: # If we surpassed the interval and lost our place # repeat the process until we succesfully contribute main(lock, model_state)
rabbitmq_source = RabbitMQEventSource( amqp_url='amqp://*****:*****@192.168.1.43/', queue='python_object_test') tf_client.create_workspace(workspace_name='python_object_test', event_source=rabbitmq_source) class MyClass: message = 'Hello' def my_action(context, event): x = context['my_class'] context['instance'] = python_object(x()) activation_event = CloudEvent().SetEventType('test.event.type').SetSubject( 'Test') tf_client.add_trigger(trigger_id='MyTrigger', event=activation_event, action=PythonCallable(my_action), context={'my_class': python_object(MyClass)}) rabbitmq_source.publish_cloudevent(activation_event) time.sleep(1.5) # Let some time for the DB to be updated trg = tf_client.get_trigger('MyTrigger') print(trg['context']['instance'])
# data = json.loads(sys.argv[3]) subject = '__init__' type = 'event.triggerflow.init' data = {} config = {'bootstrap.servers': ':9092'} def delivery_callback(err, msg): if err: print('Failed delivery: {}'.format(err)) else: print('Message delivered: {} {} {}'.format(msg.topic(), msg.partition(), msg.offset())) kafka_producer = Producer(**config) uuid = uuid4() cloudevent = ( CloudEvent().SetSubject(subject).SetEventType(type).SetEventID( uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}')) if data: print(data) cloudevent.SetContentType('application/json') cloudevent.SetData(data) payload = cloudevent.MarshalJSON(json.dumps).read().decode('utf-8') print(payload) kafka_producer.produce(topic=TOPIC, value=payload, callback=delivery_callback) kafka_producer.flush()
redis_config = { 'host': '127.0.0.1', 'port': 6379, 'db': 0, 'password': '******' } redis_eventsource = RedisEventSource(**redis_config) tf = Triggerflow() tf.create_workspace(event_source=redis_eventsource, workspace_name='timeout_test') # tf.target_workspace('timeout_test') # Create the timeout event. All timeout events have type 'event.triggerflow.timeout' timeout_event = CloudEvent().SetEventType( 'event.triggerflow.timeout').SetSubject('timeout') def my_action(event, context): print('Timeout!') # Add a trigger that will be activated when the timeout event is received tf.add_trigger(event=timeout_event, trigger_id='my_timeout_trigger', action=PythonCallable(my_action)) # Add a timeout. The 'timeout_event' will be published to the 'redis_eventsource' after 10 seconds from now tf.timeout(timeout_event, redis_eventsource, 10)
CLIENT_FUNCTION_ENDPOINT = 'https://us-south.functions.cloud.ibm.com/api/v1/namespaces/my_ibmcf_namespace/actions/triggerflow/fedlearn-client' AGGREGATOR_FUNCTION_ENDPOINT = 'https://us-south.functions.cloud.ibm.com/api/v1/namespaces/my_ibmcf_namespace/actions/triggerflow/fedlearn-aggregator' TOTAL_CLIENTS = 50 # Instantiate Triggerflow client tf = Triggerflow() # Create a workspace and add a Redis event source to it redis_source = RedisEventSource(**redis_config) tf.create_workspace(workspace_name='fedlearn', event_source=redis_source, global_context=global_context) # The event that we send to manually fire the orchestrator trigger to start the first round round_start_event = CloudEvent().SetEventType( 'round_start.federated_learning.triggerflow').SetSubject('orchestrator') # The event that the aggregator serverless function sends to fire the orchestrator to start the next round aggregator_finish_event = CloudEvent().SetEventType( 'event.triggerflow.termination.success').SetSubject('orchestrator') def orchestrator_condition(context, event): if 'rounds_completed' not in context: context['rounds_completed'] = -1 context['rounds_completed'] += 1 return context['rounds_completed'] < context['max_rounds'] def orchestrator_action(context, event): payload = {