# In[ ]: # Pre-fetch dataset locally from sklearn.datasets import fetch_20newsgroups_vectorized fetch_20newsgroups_vectorized() # Launch simulated clients NUM_CLIENTS = 10 with ThreadPoolExecutor() as executor: executor.map(lambda x: client(*x), [[lock, model_state]] * NUM_CLIENTS) # Run a test round change_task_event = CloudEvent().SetEventType('change_task').SetSubject( 'fedlearn.aggregator') change_task_event.SetData({'task': 'test'}) redis_source.publish_cloudevent(change_task_event) with ThreadPoolExecutor() as executor: executor.map(lambda x: client(*x), [[lock, model_state]] * NUM_CLIENTS) while not cloudfs.path.exists('model_score'): time.sleep(0.5) with cloudfs.open('model_score', 'rb') as f: score = pickle.loads(f.read()) print('Done!\n\n') print('Training iterations:', model_state.value['iter_count']) print('Model score:', score) # Note: in this example clients store their results into Redis that serves as a cloud storage backend apart from serving as a cache for the shared state (model_state) and the synchronization utilities (lock). We do this to avoid fiddling with more credentials, since in a common use case we would use a serverless object storage (AWS S3, IBM COS, GCP Storage) where loads of results could be stored and accessed massively. More into how to configure Cloudbutton's storage backends [here](https://github.com/cloudbutton/cloudbutton/tree/master/config).
# Setup event triggers EventStream(redis_source, global_context).match({ EventPattern(subject=r'^orchestrator$', type=r'.*'): EventHandler(condition=PythonCallable(orchestrator_condition), action=PythonCallable(orchestrator_action), context={ 'round': 1, 'client_endpoint': CLIENT_FUNCTION_ENDPOINT, 'total_clients': TOTAL_CLIENTS, 'max_rounds': 3 }), EventPattern(subject=r'^aggregator$', type=r'.*'): EventHandler(condition=PythonCallable(aggregator_condition), action=PythonCallable(aggregator_action), context={ 'round': 1, 'result_keys': [], 'counter': {}, 'threshold': .65, 'aggregator_endpoint': AGGREGATOR_FUNCTION_ENDPOINT, 'total_clients': TOTAL_CLIENTS }) }) # Fire 'orchestrator' trigger manually and start the process round_start_event = CloudEvent().SetEventType( 'round_start.federated_learning.triggerflow').SetSubject('orchestrator') round_start_event.SetData({'round': 1, 'task': 'train'}) redis_source.publish_cloudevent(round_start_event)
def main(lock, model_state): place = None while place is None: # Attempt to participate in the training round with lock: state = model_state.value interval = state['interval'] # A place will be obtained if: # - there are free places to take (timestamp == 0) # - some client has not completed its training within the interval oldest = 0 t_now = time.time() for i, timestamp in enumerate(state['round_table']): if timestamp == -1: continue t_elapsed = t_now - timestamp if t_elapsed > interval: place = i break if t_elapsed > oldest: oldest = t_elapsed if place is not None: # Take this place by putting the current timestamp state['round_table'][place] = t_now model_state.value = state print('Acquired place:', place, '|', state['round_table']) if place is None: # Retry when the interval of the oldest client training has expired print('Sleeping for:', interval - oldest) time.sleep(interval - oldest) task = state['task'] # 'train' or 'test' n = len(state['round_table']) X, y = load_data(task, place, n) if os.path.exists(state['current_weights_key']): with open(state['current_weights_key'], 'rb') as f: coef, intercept = pickle.loads(f.read()) else: coef, intercept = None, None if task == 'train': result = fit(X, y, coef, intercept) if task == 'test': result = test(X, y, coef, intercept) lock.acquire() state = model_state.value # If our place was not revoked # (could have taken too long to train) if state['round_table'][place] == t_now: # Mark as completed state['round_table'][place] = -1 print('Task done, place:', place, '|', state['round_table']) # Store result result_key = get_uuid() with open(result_key, 'wb') as f: f.write(pickle.dumps(result)) # If the round is not complete, release the lock and continue if state['round_table'].count(-1) != len(state['round_table']): model_state.value = state lock.release() # Otherwise the lock will be released when the aggregator # finishes and the next round starts # Send task complete event with the result key redis_source = RedisEventSource(**default_config()['redis'], stream='fedlearn') event = CloudEvent().SetEventType('client_task_result').SetSubject('fedlearn.client') event.SetData({ 'result_key': result_key, 'task': task }) redis_source.publish_cloudevent(event) print('Result event sent') else: # If we surpassed the interval and lost our place # repeat the process until we succesfully contribute main(lock, model_state)