# In[ ]:

# Pre-fetch dataset locally
from sklearn.datasets import fetch_20newsgroups_vectorized
fetch_20newsgroups_vectorized()

# Launch simulated clients
NUM_CLIENTS = 10
with ThreadPoolExecutor() as executor:
    executor.map(lambda x: client(*x), [[lock, model_state]] * NUM_CLIENTS)

# Run a test round
change_task_event = CloudEvent().SetEventType('change_task').SetSubject(
    'fedlearn.aggregator')
change_task_event.SetData({'task': 'test'})
redis_source.publish_cloudevent(change_task_event)

with ThreadPoolExecutor() as executor:
    executor.map(lambda x: client(*x), [[lock, model_state]] * NUM_CLIENTS)

while not cloudfs.path.exists('model_score'):
    time.sleep(0.5)
with cloudfs.open('model_score', 'rb') as f:
    score = pickle.loads(f.read())

print('Done!\n\n')
print('Training iterations:', model_state.value['iter_count'])
print('Model score:', score)

# Note: in this example clients store their results into Redis that serves as a cloud storage backend apart from serving as a cache for the shared state (model_state) and the synchronization utilities (lock). We do this to avoid fiddling with more credentials, since in a common use case we would use a serverless object storage (AWS S3, IBM COS, GCP Storage) where loads of results could be stored and accessed massively. More into how to configure Cloudbutton's storage backends [here](https://github.com/cloudbutton/cloudbutton/tree/master/config).
# Setup event triggers
EventStream(redis_source, global_context).match({
    EventPattern(subject=r'^orchestrator$', type=r'.*'):
    EventHandler(condition=PythonCallable(orchestrator_condition),
                 action=PythonCallable(orchestrator_action),
                 context={
                     'round': 1,
                     'client_endpoint': CLIENT_FUNCTION_ENDPOINT,
                     'total_clients': TOTAL_CLIENTS,
                     'max_rounds': 3
                 }),
    EventPattern(subject=r'^aggregator$', type=r'.*'):
    EventHandler(condition=PythonCallable(aggregator_condition),
                 action=PythonCallable(aggregator_action),
                 context={
                     'round': 1,
                     'result_keys': [],
                     'counter': {},
                     'threshold': .65,
                     'aggregator_endpoint': AGGREGATOR_FUNCTION_ENDPOINT,
                     'total_clients': TOTAL_CLIENTS
                 })
})

# Fire 'orchestrator' trigger manually and start the process
round_start_event = CloudEvent().SetEventType(
    'round_start.federated_learning.triggerflow').SetSubject('orchestrator')
round_start_event.SetData({'round': 1, 'task': 'train'})
redis_source.publish_cloudevent(round_start_event)
Beispiel #3
0
def main(lock, model_state):
    place = None
    while place is None:
        # Attempt to participate in the training round
        with lock:
            state = model_state.value
            interval = state['interval']

            # A place will be obtained if:
            #   - there are free places to take (timestamp == 0)
            #   - some client has not completed its training within the interval
            oldest = 0
            t_now = time.time()
            for i, timestamp in enumerate(state['round_table']):
                if timestamp == -1:
                    continue

                t_elapsed = t_now - timestamp
                if t_elapsed > interval:
                    place = i
                    break

                if t_elapsed > oldest:
                    oldest = t_elapsed

            if place is not None:
                # Take this place by putting the current timestamp
                state['round_table'][place] = t_now
                model_state.value = state
                print('Acquired place:', place, '|', state['round_table'])

        if place is None:
            # Retry when the interval of the oldest client training has expired
            print('Sleeping for:', interval - oldest)
            time.sleep(interval - oldest)

    task = state['task']    # 'train' or 'test'
    n = len(state['round_table'])
    X, y = load_data(task, place, n)

    if os.path.exists(state['current_weights_key']):
        with open(state['current_weights_key'], 'rb') as f:
            coef, intercept = pickle.loads(f.read())
    else:
        coef, intercept = None, None
    
    if task == 'train':
        result = fit(X, y, coef, intercept)

    if task == 'test':
        result = test(X, y, coef, intercept)

    lock.acquire()
    state = model_state.value
    # If our place was not revoked
    # (could have taken too long to train)
    if state['round_table'][place] == t_now:
        # Mark as completed
        state['round_table'][place] = -1
        print('Task done, place:', place, '|', state['round_table'])

        # Store result
        result_key = get_uuid()
        with open(result_key, 'wb') as f:
            f.write(pickle.dumps(result))
        
        # If the round is not complete, release the lock and continue
        if state['round_table'].count(-1) != len(state['round_table']):
            model_state.value = state
            lock.release()
        # Otherwise the lock will be released when the aggregator
        # finishes and the next round starts

        # Send task complete event with the result key
        redis_source = RedisEventSource(**default_config()['redis'], stream='fedlearn')
        event = CloudEvent().SetEventType('client_task_result').SetSubject('fedlearn.client')
        event.SetData({
            'result_key': result_key,
            'task': task
        })
        redis_source.publish_cloudevent(event)
        print('Result event sent')

    else:
        # If we surpassed the interval and lost our place
        # repeat the process until we succesfully contribute
        main(lock, model_state)