Ejemplo n.º 1
0
def trigger_statemachine(run_id: str,
                         execution_input: dict = None,
                         event_source: EventSource = None):
    log.info("Trigger State Machine Execution: {}".format(run_id))

    if execution_input is None:
        execution_input = {}

    uuid = uuid4()
    init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType(
        'lambda.success').SetEventID(
            uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

    if execution_input is not None:
        init_cloudevent.SetData(execution_input)
        init_cloudevent.SetContentType('application/json')

    if event_source is None:
        credentials = get_config()['statemachines']['aws']
        event_source = SQSEventSource(
            name=run_id + '_' + 'SQSEventSource',
            access_key_id=credentials['access_key_id'],
            secret_access_key=credentials['secret_access_key'],
            region=credentials['region'],
            queue=run_id)

    event_source.set_stream(run_id)
    event_source.publish_cloudevent(init_cloudevent)
    log.info("Ok")
Ejemplo n.º 2
0
 def __trigger(self, silent=False):
     event_source = list(self.dag.event_sources.values()).pop()
     uuid = uuid4()
     init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType(
         'event.triggerflow.init').SetEventID(
             uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))
     event_source.set_stream(self.dagrun_id)
     event_source.publish_cloudevent(init_cloudevent)
     self.state = DAGRun.State.RUNNING
     if not silent:
         print('DAG Run ID: {}'.format(self.dagrun_id))
     return self
Ejemplo n.º 3
0
def trigger_statemachine(run_id: str):
    config = get_config()
    credentials = config['statemachines']['aws']

    event_source = SQSEventSource(
        access_key_id=credentials['access_key_id'],
        secret_access_key=credentials['secret_access_key'],
        region=credentials['region'],
        queue=run_id)
    uuid = uuid4()
    init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType(
        'lambda.success').SetEventID(
            uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))
    event_source.publish_cloudevent(init_cloudevent)
Ejemplo n.º 4
0
    def trigger(self, execution_input: dict = None):
        log.info("Trigger State Machine Execution: {}".format(self.run_id))

        if execution_input is None:
            execution_input = {}

        uuid = uuid4()
        init_cloudevent = (CloudEvent().SetSubject('__init__').SetEventType(
            'lambda.success').SetEventID(
                uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

        if execution_input is not None:
            init_cloudevent.SetData(execution_input)
            init_cloudevent.SetContentType('application/json')

        self.event_source.publish_cloudevent(init_cloudevent)
        log.info("Ok")
Ejemplo n.º 5
0
def setup_triggers():
    kaf = KafkaEventSource(broker_list=['127.0.0.1:9092'], topic='ingestion')
    tf = Triggerflow()
    tf.create_workspace(workspace_name='ingestion-test', event_source=kaf)

    for i in range(200):
        uuid = uuid4()
        cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType(
            'event.triggerflow.test').SetEventID(
                uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

        tf.add_trigger(event=cloudevent,
                       trigger_id="join{}".format(i),
                       condition=DefaultConditions.JOIN,
                       action=DefaultActions.TERMINATE,
                       context={'join': 1000},
                       transient=False)
Ejemplo n.º 6
0
def setup_triggers():
    rabbit = RabbitMQEventSource(amqp_url='amqp://*****:*****@127.0.0.1:5672',
                                 queue='ingestion')
    tf = Triggerflow()
    tf.create_workspace(workspace_name='ingestion-test', event_source=rabbit)

    for i in range(200):
        uuid = uuid4()
        cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType(
            'event.triggerflow.test').SetEventID(
                uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

        tf.add_trigger(event=cloudevent,
                       trigger_id="join{}".format(i),
                       condition=DefaultConditions.JOIN,
                       action=DefaultActions.TERMINATE,
                       context={'join': 1000},
                       transient=False)
Ejemplo n.º 7
0
def setup_triggers():
    red = RedisEventSource(host="127.0.0.1", port=6379, password="******")
    red.set_stream(stream)
    tf = Triggerflow()
    tf.create_workspace(workspace_name='ingestion-test', event_source=red)

    for i in range(200):
        uuid = uuid4()
        cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType(
            'event.triggerflow.test').SetEventID(
                uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

        tf.add_trigger(event=cloudevent,
                       trigger_id="join{}".format(i),
                       condition=DefaultConditions.JOIN,
                       action=DefaultActions.TERMINATE,
                       context={'join': 1000},
                       transient=False)
Ejemplo n.º 8
0
def setup_triggers():
    red = RedisEventSource(host="127.0.0.1",
                           port=6379,
                           password="******")
    tf = Triggerflow()
    tf.create_workspace(workspace_name='ingestion-test', event_source=red)

    for i in range(200):
        uuid = uuid4()
        cloudevent = (CloudEvent().SetSubject("join{}".format(i)).SetEventType(
            'event.triggerflow.test').SetEventID(
                uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))

        tf.add_trigger(event=cloudevent,
                       trigger_id="join{}".format(i),
                       condition=DefaultConditions.SIMPLE_JOIN,
                       action=DefaultActions.TERMINATE,
                       context={'join': 1000},
                       context_parser="JOIN",
                       transient=False)
Ejemplo n.º 9
0
def main(args):
    task = args['task']
    task_result_keys = args['task_result_keys']
    agg_result_key = args['agg_result_key']
    os = CloudFileProxy(CloudStorage(args['cb_config']))
    open = os.open

    # Load stored client results
    task_results = []
    for k in task_result_keys:
        with open(k, 'rb') as f:
            task_results.append(pickle.loads(f.read()))

    # Aggregate
    if task == 'train':
        current_weights_key = args['current_weights_key']
        if os.path.exists(current_weights_key):
            with open(current_weights_key, 'rb') as f:
                curr_coef, curr_intercept = pickle.loads(f.read())
        else:
            curr_coef, curr_intercept = (0, 0)

        agg_result = aggregate(task_results, curr_coef, curr_intercept)

    if task == 'test':
        agg_result = sum(task_results) / len(task_results)

    # Store result
    with open(agg_result_key, 'wb') as f:
        f.write(pickle.dumps(agg_result))

    # Delete client results
    [os.remove(k) for k in task_result_keys]

    redis_source = RedisEventSource(**args['cb_config']['redis'],
                                    stream='fedlearn')
    event = CloudEvent().SetEventType('aggregation_complete').SetSubject(
        'fedlearn.aggregator')
    redis_source.publish_cloudevent(event)

    return {'success': 1}
Ejemplo n.º 10
0
def setup():
    client_config = get_triggerflow_config('~/client_config.yaml')
    kafka_config = client_config['event_sources']['kafka']

    tf = TriggerflowClient(**client_config['triggerflow'], caching=True)

    kafka = KafkaEventSource(name='stress_kafka',
                             broker_list=kafka_config['broker_list'],
                             topic='stress_kafka')

    # tf.create_workspace(workspace='stress_kafka', event_source=kafka)
    tf.target_workspace(workspace='stress_kafka')

    for i in range(N_STEPS):
        for j in range(N_MAPS):
            tf.add_trigger(CloudEvent('map_{}_{}'.format(i, j)),
                           action=DefaultActions.PASS,
                           condition=DefaultConditions.SIMPLE_JOIN,
                           context={'total_activations': N_JOIN[i]})

    tf.commit_cached_triggers()
model_state = manager.Value()
state = {
    'task': 'train',
    'interval': 5,
    'round_table': [0] * ROUND_N,
    'current_weights_key': 'model_weights',
    'iter_count': 0
}
model_state.value = state

# ## Aggregation trigger

# In[ ]:

# Create the trigger activation event
client_act_event = CloudEvent().SetEventType('client_task_result').SetSubject(
    'fedlearn.client')


# Create a custom Python callable condition
def custom_join(context, event):
    context['task_result_keys'].append(event['data']['result_key'])
    context['task'] = event['data']['task']

    if len(context['task_result_keys']) == context['join']:
        context['invoke_kwargs'] = {
            'task': context['task'],
            'cb_config': context['cb_config'],
            'task_result_keys': context['task_result_keys'],
            'current_weights_key': context['current_weights_key'],
        }
        if context['task'] == 'train':
Ejemplo n.º 12
0
    def __create_triggers(self):
        tf = TriggerflowCachedClient()

        # Create unique workspace for this specific dag run and its event sources
        event_sources = list(self.dag.event_sources.values())
        # Set current DAGRun ID as topic/queue name for the event sources
        [
            event_source.set_stream(self.dagrun_id)
            for event_source in event_sources
        ]
        tf.create_workspace(workspace_name=self.dagrun_id,
                            event_source=event_sources.pop(),
                            global_context={})
        for event_source in event_sources:
            tf.add_event_source(event_source)

        for task in self.dag.tasks:
            context = {
                'subject': task.task_id,
                'dependencies': {},
                'operator': task.get_trigger_meta(),
                'result': []
            }

            # If this task does not have upstream relatives, then it will be executed when the sentinel event __init__
            # is produced, else, it will be executed every time one of its upstream relatives produces its term. event
            if not task.upstream_relatives:
                condition = DefaultConditions.TRUE  # Initial task do not have dependencies
                activation_event = CloudEvent().SetSubject(
                    '__init__').SetEventType('event.triggerflow.init')
                act_events = [activation_event]
            else:
                condition = DefaultConditions.DAG_TASK_JOIN
                act_events = []
                for upstream_relative in task.upstream_relatives:
                    context['dependencies'][upstream_relative.task_id] = {
                        'join': -1,
                        'counter': 0
                    }
                    activation_event = (CloudEvent().SetSubject(
                        upstream_relative.task_id).SetEventType(
                            'event.triggerflow.termination.success'))
                    act_events.append(activation_event)

            # Add a trigger that handles this task execution: It will be fired every time one of its upstream
            # relatives sends its termination event, but it is executed only when all dependencies are fulfilled
            tf.add_trigger(event=act_events,
                           action=DefaultActions[task.trigger_action_name],
                           condition=condition,
                           context=context,
                           trigger_id=task.task_id,
                           transient=False)

        # Join final tasks (those that do not have downstream relatives)
        context = {
            'subject': '__end__',
            'dependencies': {
                final_task.task_id: {
                    'join': -1,
                    'counter': 0
                }
                for final_task in self.dag.final_tasks
            },
            'result': []
        }
        activation_events = [
            (CloudEvent().SetSubject(final_task.task_id).SetEventType(
                'event.triggerflow.termination.success'))
            for final_task in self.dag.final_tasks
        ]
        tf.add_trigger(event=activation_events,
                       action=DefaultActions.TERMINATE,
                       condition=DefaultConditions.DAG_TASK_JOIN,
                       context=context,
                       trigger_id='__end__',
                       transient=False)

        # Add error handling trigger: All tasks that produce a failure event type will fire this trigger
        # activation_event = v1.Event().SetSubject('*').SetEventType('event.triggerflow.termination.failure')
        # tf.add_trigger(event=activation_event,
        #                action=DefaultActions.DAG_TASK_FAILURE_HANDLER,
        #                condition=DefaultConditions.TRUE,
        #                context={},
        #                context_parser='DAGS',
        #                trigger_id='__error_handler__',
        #                transient=False)
        #
        # # Add retry handler trigger: We will use this trigger to manually fire it to retry any failed task
        # activation_event = v1.Event().SetSubject('__retry__').SetEventType('event.triggerflow.termination.failure')
        # tf.add_trigger(event=activation_event,
        #                action=DefaultActions.DAG_TASK_RETRY_HANDLER,
        #                condition=DefaultConditions.TRUE,
        #                context={},
        #                context_parser='DAGS',
        #                trigger_id='__retry_handler__',
        #                transient=False)

        tf.commit_cached_triggers()
        self.state = DAGRun.State.DEPLOYED
Ejemplo n.º 13
0
        def state_machine(states, trigger_event):
            nonlocal state_machine_count, queue_arn, lambdas_updated
            state_machine_id = 'StateMachine{}'.format(state_machine_count)
            state_machine_count += 1

            upstream_relatives = defaultdict(list)
            final_states = []
            choices = {}

            for state_name, state in states['States'].items():
                if 'End' in state and state['End']:
                    final_states.append(state_name)
                elif 'Next' in state:
                    upstream_relatives[state['Next']].append(state_name)
                elif state['Type'] == 'Choice':
                    for choice in state['Choices']:
                        upstream_relatives[choice['Next']].append(state_name)

            upstream_relatives[states['StartAt']].extend(trigger_event)

            for state_name, state in states['States'].items():
                context = {'Subject': state_name, 'State': state.copy()}

                if state_name in choices:
                    context['Condition'] = choices[state_name].copy()

                if state['Type'] == 'Pass' or state['Type'] == 'Task':

                    subjects = upstream_relatives[state_name]
                    activation_events = [
                        CloudEvent().SetEventType('lambda.success').SetSubject(
                            sub) for sub in subjects
                    ]

                    action = AwsAsfActions.AWS_ASF_TASK if state[
                        'Type'] == 'Task' else AwsAsfActions.AWS_ASF_PASS

                    if state['Type'] == 'Task' and isinstance(self.event_source, SQSEventSource) and \
                            state['Resource'] not in lambdas_updated:
                        self.__add_destination_to_lambda(
                            state['Resource'], queue_arn)
                        lambdas_updated[state['Resource']] = True

                    triggerflow_client.add_trigger(
                        event=activation_events,
                        condition=AwsAsfConditions.AWS_ASF_CONDITION,
                        action=action,
                        context=context,
                        trigger_id=state_name,
                        transient=False)

                elif state['Type'] == 'Choice':
                    choices = {}
                    for choice in state['Choices']:
                        upstream_relatives[
                            choice['Next']] = upstream_relatives[state_name]
                        choices[choice['Next']] = choice.copy()

                elif state['Type'] == 'Parallel':
                    sub_state_machines = []

                    for branch in state['Branches']:
                        sub_sm_id = state_machine(
                            branch, upstream_relatives[state_name])
                        sub_state_machines.append(sub_sm_id)

                    context['join_multiple'] = len(state['Branches'])
                    del context['State']

                    act_events = [
                        CloudEvent().SetEventType('lambda.success').SetSubject(
                            sub_sm) for sub_sm in sub_state_machines
                    ]

                    triggerflow_client.add_trigger(
                        event=act_events,
                        condition=AwsAsfConditions.AWS_ASF_JOIN_STATEMACHINE,
                        action=AwsAsfActions.AWS_ASF_PASS,
                        context=context,
                        trigger_id=state_name,
                        transient=False)

                elif state['Type'] == 'Wait':
                    raise NotImplementedError()

                elif state['Type'] == 'Map':
                    iterator = state_machine(state['Iterator'], [state_name])
                    context['join_state_machine'] = iterator
                    del context['State']['Iterator']

                    subjects = upstream_relatives[state_name]
                    activation_events = [
                        CloudEvent().SetEventType('lambda.success').SetSubject(
                            sub) for sub in subjects
                    ]

                    triggerflow_client.add_trigger(
                        event=activation_events,
                        condition=AwsAsfConditions.AWS_ASF_CONDITION,
                        action=AwsAsfActions.AWS_ASF_MAP,
                        context=context,
                        trigger_id=state_name,
                        transient=False)
                    if 'Next' in state:
                        upstream_relatives[state['Next']].remove(state_name)
                        upstream_relatives[state['Next']].append(iterator)
                    if 'End' in state:
                        final_states.remove(state_name)
                        final_states.append(iterator)

                elif state['Type'] == 'Succeed':
                    raise NotImplementedError()
                elif state['Type'] == 'Fail':
                    raise NotImplementedError()

            activation_events = [
                CloudEvent().SetEventType('lambda.success').SetSubject(sub)
                for sub in final_states
            ]
            triggerflow_client.add_trigger(
                activation_events,
                condition=AwsAsfConditions.AWS_ASF_JOIN_STATEMACHINE,
                action=AwsAsfActions.AWS_ASF_END_STATEMACHINE,
                context={'Subject': state_machine_id},
                trigger_id=state_machine_id,
                transient=False)

            return state_machine_id
# Setup event triggers
EventStream(redis_source, global_context).match({
    EventPattern(subject=r'^orchestrator$', type=r'.*'):
    EventHandler(condition=PythonCallable(orchestrator_condition),
                 action=PythonCallable(orchestrator_action),
                 context={
                     'round': 1,
                     'client_endpoint': CLIENT_FUNCTION_ENDPOINT,
                     'total_clients': TOTAL_CLIENTS,
                     'max_rounds': 3
                 }),
    EventPattern(subject=r'^aggregator$', type=r'.*'):
    EventHandler(condition=PythonCallable(aggregator_condition),
                 action=PythonCallable(aggregator_action),
                 context={
                     'round': 1,
                     'result_keys': [],
                     'counter': {},
                     'threshold': .65,
                     'aggregator_endpoint': AGGREGATOR_FUNCTION_ENDPOINT,
                     'total_clients': TOTAL_CLIENTS
                 })
})

# Fire 'orchestrator' trigger manually and start the process
round_start_event = CloudEvent().SetEventType(
    'round_start.federated_learning.triggerflow').SetSubject('orchestrator')
round_start_event.SetData({'round': 1, 'task': 'train'})
redis_source.publish_cloudevent(round_start_event)
Ejemplo n.º 15
0
def main(lock, model_state):
    place = None
    while place is None:
        # Attempt to participate in the training round
        with lock:
            state = model_state.value
            interval = state['interval']

            # A place will be obtained if:
            #   - there are free places to take (timestamp == 0)
            #   - some client has not completed its training within the interval
            oldest = 0
            t_now = time.time()
            for i, timestamp in enumerate(state['round_table']):
                if timestamp == -1:
                    continue

                t_elapsed = t_now - timestamp
                if t_elapsed > interval:
                    place = i
                    break

                if t_elapsed > oldest:
                    oldest = t_elapsed

            if place is not None:
                # Take this place by putting the current timestamp
                state['round_table'][place] = t_now
                model_state.value = state
                print('Acquired place:', place, '|', state['round_table'])

        if place is None:
            # Retry when the interval of the oldest client training has expired
            print('Sleeping for:', interval - oldest)
            time.sleep(interval - oldest)

    task = state['task']    # 'train' or 'test'
    n = len(state['round_table'])
    X, y = load_data(task, place, n)

    if os.path.exists(state['current_weights_key']):
        with open(state['current_weights_key'], 'rb') as f:
            coef, intercept = pickle.loads(f.read())
    else:
        coef, intercept = None, None
    
    if task == 'train':
        result = fit(X, y, coef, intercept)

    if task == 'test':
        result = test(X, y, coef, intercept)

    lock.acquire()
    state = model_state.value
    # If our place was not revoked
    # (could have taken too long to train)
    if state['round_table'][place] == t_now:
        # Mark as completed
        state['round_table'][place] = -1
        print('Task done, place:', place, '|', state['round_table'])

        # Store result
        result_key = get_uuid()
        with open(result_key, 'wb') as f:
            f.write(pickle.dumps(result))
        
        # If the round is not complete, release the lock and continue
        if state['round_table'].count(-1) != len(state['round_table']):
            model_state.value = state
            lock.release()
        # Otherwise the lock will be released when the aggregator
        # finishes and the next round starts

        # Send task complete event with the result key
        redis_source = RedisEventSource(**default_config()['redis'], stream='fedlearn')
        event = CloudEvent().SetEventType('client_task_result').SetSubject('fedlearn.client')
        event.SetData({
            'result_key': result_key,
            'task': task
        })
        redis_source.publish_cloudevent(event)
        print('Result event sent')

    else:
        # If we surpassed the interval and lost our place
        # repeat the process until we succesfully contribute
        main(lock, model_state)
Ejemplo n.º 16
0
rabbitmq_source = RabbitMQEventSource(
    amqp_url='amqp://*****:*****@192.168.1.43/', queue='python_object_test')

tf_client.create_workspace(workspace_name='python_object_test',
                           event_source=rabbitmq_source)


class MyClass:
    message = 'Hello'


def my_action(context, event):
    x = context['my_class']
    context['instance'] = python_object(x())


activation_event = CloudEvent().SetEventType('test.event.type').SetSubject(
    'Test')

tf_client.add_trigger(trigger_id='MyTrigger',
                      event=activation_event,
                      action=PythonCallable(my_action),
                      context={'my_class': python_object(MyClass)})

rabbitmq_source.publish_cloudevent(activation_event)

time.sleep(1.5)  # Let some time for the DB to be updated

trg = tf_client.get_trigger('MyTrigger')
print(trg['context']['instance'])
Ejemplo n.º 17
0
    # data = json.loads(sys.argv[3])
    subject = '__init__'
    type = 'event.triggerflow.init'
    data = {}

    config = {'bootstrap.servers': ':9092'}

    def delivery_callback(err, msg):
        if err:
            print('Failed delivery: {}'.format(err))
        else:
            print('Message delivered: {} {} {}'.format(msg.topic(),
                                                       msg.partition(),
                                                       msg.offset()))

    kafka_producer = Producer(**config)
    uuid = uuid4()
    cloudevent = (
        CloudEvent().SetSubject(subject).SetEventType(type).SetEventID(
            uuid.hex).SetSource(f'urn:{node()}:{str(uuid)}'))
    if data:
        print(data)
        cloudevent.SetContentType('application/json')
        cloudevent.SetData(data)
    payload = cloudevent.MarshalJSON(json.dumps).read().decode('utf-8')
    print(payload)
    kafka_producer.produce(topic=TOPIC,
                           value=payload,
                           callback=delivery_callback)
    kafka_producer.flush()
Ejemplo n.º 18
0
redis_config = {
    'host': '127.0.0.1',
    'port': 6379,
    'db': 0,
    'password': '******'
}

redis_eventsource = RedisEventSource(**redis_config)

tf = Triggerflow()
tf.create_workspace(event_source=redis_eventsource,
                    workspace_name='timeout_test')
# tf.target_workspace('timeout_test')

# Create the timeout event. All timeout events have type 'event.triggerflow.timeout'
timeout_event = CloudEvent().SetEventType(
    'event.triggerflow.timeout').SetSubject('timeout')


def my_action(event, context):
    print('Timeout!')


# Add a trigger that will be activated when the timeout event is received
tf.add_trigger(event=timeout_event,
               trigger_id='my_timeout_trigger',
               action=PythonCallable(my_action))

# Add a timeout. The 'timeout_event' will be published to the 'redis_eventsource' after 10 seconds from now
tf.timeout(timeout_event, redis_eventsource, 10)
Ejemplo n.º 19
0
CLIENT_FUNCTION_ENDPOINT = 'https://us-south.functions.cloud.ibm.com/api/v1/namespaces/my_ibmcf_namespace/actions/triggerflow/fedlearn-client'
AGGREGATOR_FUNCTION_ENDPOINT = 'https://us-south.functions.cloud.ibm.com/api/v1/namespaces/my_ibmcf_namespace/actions/triggerflow/fedlearn-aggregator'
TOTAL_CLIENTS = 50

# Instantiate Triggerflow client
tf = Triggerflow()

# Create a workspace and add a Redis event source to it
redis_source = RedisEventSource(**redis_config)
tf.create_workspace(workspace_name='fedlearn',
                    event_source=redis_source,
                    global_context=global_context)

# The event that we send to manually fire the orchestrator trigger to start the first round
round_start_event = CloudEvent().SetEventType(
    'round_start.federated_learning.triggerflow').SetSubject('orchestrator')

# The event that the aggregator serverless function sends to fire the orchestrator to start the next round
aggregator_finish_event = CloudEvent().SetEventType(
    'event.triggerflow.termination.success').SetSubject('orchestrator')


def orchestrator_condition(context, event):
    if 'rounds_completed' not in context:
        context['rounds_completed'] = -1
    context['rounds_completed'] += 1
    return context['rounds_completed'] < context['max_rounds']


def orchestrator_action(context, event):
    payload = {