def read_input(input_dir: str): """ Reads all files stored in 'input_dir'. :param input_dir: The input directory containing the files. :return: None """ data = None filename = redis_get('input_filename') missing_data = redis_get('missing_data') try: current_app.logger.info('[API] Parsing data of ' + input_dir) current_app.logger.info('[API] ' + filename) if filename.endswith(".csv"): sep = ',' data = pd.read_csv(input_dir + '/' + filename, sep=sep) elif filename.endswith(".tsv"): sep = '\t' data = pd.read_csv(input_dir + '/' + filename, sep=sep) if missing_data == "mean": data.fillna(data.mean(), inplace=True) elif missing_data == "median": data.fillna(data.median(), inplace=True) elif missing_data == "drop": data.dropna(inplace=True) current_app.logger.info('[API] ' + str(data)) return data except Exception as e: current_app.logger.info('[API] could not read files', e)
def root(): """ decides which HTML page content will be shown to the user :return: HTML content """ step = get_step() if step == 'start': current_app.logger.info('[WEB] Initializing') return 'Initializing' elif step == 'setup': current_app.logger.info('[WEB] Setup') return 'Setup' elif step == 'local_calculation': current_app.logger.info('[WEB] Perform local boosting') return 'Perform local boosting...' elif step == 'waiting': if redis_get('is_coordinator'): current_app.logger.info('[WEB] Waiting for client model...') return 'Waiting fo client model...' else: current_app.logger.info('[WEB] Send local results to coordinator') return 'Send local results to coordinator' elif step == 'global_calculation': current_app.logger.info('[WEB] Combine models') return 'Combine models...' elif step == 'broadcast_results': if not redis_get('coordinator'): current_app.logger.info( '[WEB] Receiving global model from coordinator') return 'Receiving global model from coordinator...' else: current_app.logger.info( '[WEB] Broadcasting global model to other clients') return 'Broadcasting global model to other clients...' elif step == 'test_results': current_app.logger.info('[WEB] Write Results') return 'Test results....' elif step == 'finalize': current_app.logger.info('[WEB] Finalize') return 'Finalize...' elif step == 'finished': current_app.logger.info('[WEB] Finished') return render_template('start_client.html', score1=redis_get('score_single'), score2=redis_get('score_combined'), pred=redis_get('predictions')) else: return 'Something went wrong.'
def get_connections(source, destination, departure_date): key = journey_key(source, destination, departure_date) kwargs = { "source": source, "destination": destination, "departure_date": departure_date, } return redis_get(key, 60 * 60, fetch_connections, kwargs)
def calculate_average(): global_model = jsonpickle.decode(redis_get('global_model')) client_id = redis_get('id') test_set = jsonpickle.decode(redis_get("test_set")) x_test = test_set[0] y_test = test_set[1] clf = global_model sum_pred = clf.predict(x_test) if "acc" in redis_get("metric"): score = accuracy_score(y_test, sum_pred) elif "matth" in redis_get("metric"): score = matthews_corrcoef(y_test, sum_pred) elif "roc" in redis_get("metric") or "auc" in redis_get("metric"): score = roc_auc_score(y_test, sum_pred) else: score = accuracy_score(y_test, sum_pred) current_app.logger.info( f'[API] Combined AdaBoost classifier model score on local test data for {client_id}: {score}, Predictions: {sum_pred}' ) redis_set('predictions', sum_pred) redis_set('score_combined', score) return score
def has_client_model_arrived(): """ Checks if the models of all clients has arrived. :return: None """ current_app.logger.info( '[API] Coordinator checks if the models of all clients have arrived') global_data = redis_get('global_data') nr_clients = redis_get('nr_clients') current_app.logger.info('[API] ' + str(len(global_data)) + "/" + str(nr_clients) + " clients have sent their models.") if len(global_data) == nr_clients: current_app.logger.info('[API] The models of all clients have arrived') set_step('global_calculation') else: current_app.logger.info( '[API] The model of at least one client is still missing')
def have_clients_finished(): """ Checks if the all clients have finished. :return: True if all clients have finished, False otherwise """ current_app.logger.info( '[API] Coordinator checks if all clients have finished') finish = redis_get('finished') nr_clients = redis_get('nr_clients') current_app.logger.info('[API] ' + str(len(finish)) + "/" + str(nr_clients) + " clients have finished already.") if len(finish) == nr_clients: current_app.logger.info('[API] All clients have finished.') return True else: current_app.logger.info( '[API] At least one client did not finish yet-') return False
def get_location_id(city_name, cities): key = city_key(city_name) kwargs = {"city_name": city_name, "cities": cities} city = redis_get(key, 60 * 60, get_city_by_name, kwargs) if not city: return None return city["id"]
def calculate_global_model(): """ Combines the models of all clients in a list. :return: None """ current_app.logger.info('[API] Combine all models') global_data = redis_get('global_data') global_model = jsonpickle.decode(global_data[0]) for model in global_data[1:]: global_model.estimators_ = global_model.estimators_ + jsonpickle.decode( model).estimators_ redis_set('global_model', jsonpickle.encode(global_model))
def calculate_local_model(): """ Perform local boosting :return: the model """ current_app.logger.info('[API] Perform local boosting') d = redis_get('files') if d is None: current_app.logger.info('[API] No data available') return None else: client_id = redis_get('id') df = set_X_y(d, label_col=redis_get("label_col")) # Split dataset into training set and test set # 70% training and 30% test x_train, x_test, y_train, y_test = train_test_split( df.get("data"), df.get("target"), test_size=redis_get("test_size"), stratify=df.get("target"), random_state=redis_get("random_state")) redis_set("test_set", jsonpickle.encode([x_test, y_test])) score, model = build_model(x_train, x_test, y_train, y_test) saved_model = jsonpickle.encode(model) metric = redis_get("metric") current_app.logger.info( f'[API] Local AdaBoost classifier model {metric} {client_id}: {score} ' ) redis_set('score_single', score) return saved_model
def read_config(): with open(INPUT_DIR + '/config.yml') as f: config = yaml.load(f, Loader=yaml.FullLoader)['fc_boosting'] redis_set('input_filename', config['files']['input']) redis_set('label_col', config['parameters']['label_col']) redis_set('output_filename', config['files']['output']) redis_set('missing_data', config['files']['missing_data']) redis_set('test_size', config['parameters']['test_size']) redis_set('n_estimators', config['parameters']['n_estimators']) redis_set('learning_rate', config['parameters']['learning_rate']) redis_set('random_state', config['parameters']['random_state']) redis_set('metric', config['parameters']['metric']) current_app.logger.info('[API] label_col: ' + str(redis_get('label_col'))) current_app.logger.info('[API] test_size: ' + str(redis_get('test_size'))) current_app.logger.info('[API] n_estimators: ' + str(redis_get('n_estimators'))) current_app.logger.info('[API] learning_rate: ' + str(redis_get('learning_rate'))) current_app.logger.info('[API] random_state: ' + str(redis_get('random_state'))) current_app.logger.info('[API] metric: ' + str(redis_get('metric')))
def data(): """ GET request to /data sends data to coordinator POST request to /data pulls data from coordinator :return: GET request: JSON with key 'data' and value data POST request: JSON True """ if request.method == 'POST': current_app.logger.info('[API] /data POST request') if redis_get('is_coordinator'): # Get data from clients (as coordinator) if get_step() != 'finalize': # Get local models of the clients global_data = redis_get('global_data') global_data.append(request.get_json(True)['data']) redis_set('global_data', global_data) return jsonify(True) else: # Get Finished flags of the clients request.get_json(True) finish = redis_get('finished') finish.append(request.get_json(True)['finished']) redis_set('finished', finish) return jsonify(True) else: # Get models from coordinator (as client) redis_set('global_model', request.get_json(True)['global_model']) set_step('test_results') return jsonify(True) elif request.method == 'GET': current_app.logger.info('[API] /data GET request') if not redis_get('is_coordinator'): # send model to coordinator (as client) if get_step() != 'finalize': # Send local model to the coordinator current_app.logger.info('[API] send model to coordinator') redis_set('available', False) local_data = redis_get('local_data') return jsonify({'data': local_data}) else: # Send finish flag to the coordinator current_app.logger.info( '[API] send finish flag to coordinator') redis_set('available', False) set_step('finished') return jsonify({'finished': True}) else: # broadcast data to clients (as coordinator) redis_set('available', False) global_model = redis_get('global_model') return jsonify({'global_model': global_model}) else: current_app.logger.info( '[API] Wrong request type, only GET and POST allowed') return jsonify(True)
def write_results(output_dir: str, model=None, score=None, plot=None): """ Writes the results of global_km to the output_directory. :param results: Global results calculated from the local counts of the clients :param output_dir: String of the output directory. Usually /mnt/output :return: None """ current_app.logger.info("[API] Write results to output folder") if model is not None: # save the model to disk filename = output_dir + '/' + 'global_boosting_classifier.sav' pickle.dump(model, open(filename, 'wb')) if score is not None: filename = output_dir + '/eval_on_local_testset.csv' score_df = pd.DataFrame( index=["local_model", "global_model"], columns=[redis_get("metric")], data=[redis_get("score_single"), redis_get("score_combined")]) score_df.to_csv(filename) if plot is not None: filename = 'plot.png' plot.savefig(filename)
def params(): """ :return: current parameter values as a HTML page """ is_coordinator = redis_get('is_coordinator') step = redis_get('step') local_data = redis_get('local_data') global_data = redis_get('global_data') data = redis_get('data') available = redis_get('available') return f"""
def build_model(x_train, x_test, y_train, y_test): lr = float(redis_get("learning_rate")) if redis_get("random_state"): random_state = redis_get("random_state") else: random_state = None # Create adaboost object obj = AdaBoostClassifier(n_estimators=redis_get("n_estimators"), learning_rate=lr, random_state=random_state) # Train Adaboost model = obj.fit(x_train, y_train) y_pred = model.predict(x_test) if "acc" in redis_get("metric"): score = accuracy_score(y_test, y_pred) elif "matth" in redis_get("metric"): score = matthews_corrcoef(y_test, y_pred) elif "roc" in redis_get("metric") or "auc" in redic_get("metric"): score = roc_auc_score(y_test, y_pred) else: score = accuracy_score(y_test, y_pred) return score, model
def status(): """ GET request to /status, if True is returned a GET data request will be send :return: JSON with key 'available' and value True or False and 'finished' value True or False """ available = redis_get('available') current_app.logger.info('[API] /status GET request ' + str(available) + ' - [STEP]: ' + str(get_step())) if get_step() == 'start': current_app.logger.info('[STEP] start') current_app.logger.info('[API] Federated Boosting App') elif get_step() == 'local_calculation': current_app.logger.info('[STEP] local_calculation') model = calculate_local_model() if redis_get('is_coordinator'): # if this is the coordinator, directly add the local model to the global_data list global_data = redis_get('global_data') global_data.append(model) redis_set('global_data', global_data) current_app.logger.info('[STEP] : waiting_for_clients') else: # if this is a client, set the local model to local_data and set available to true redis_set('local_data', model) current_app.logger.info('[STEP] waiting_for_coordinator') redis_set('available', True) set_step('waiting') elif get_step() == 'waiting': current_app.logger.info('[STEP] waiting') if redis_get('is_coordinator'): # check if all clients have sent their models already has_client_model_arrived() else: # the clients wait for the coordinator to finish current_app.logger.info( '[API] Client waiting for coordinator to finish') elif get_step() == 'global_calculation': # as soon as all data has arrived the global calculation starts current_app.logger.info('[STEP] global_calculation') calculate_global_model() set_step("broadcast_results") elif get_step() == 'broadcast_results': # the result is broadcasted to the clients current_app.logger.info('[STEP] broadcast_results') current_app.logger.info('[API] Share global results with clients') redis_set('available', True) set_step('test_results') elif get_step() == 'test_results': current_app.logger.info('[STEP] test_results') score = calculate_average() redis_set("score_of_global_model_on_local_test_set", score) current_app.logger.info('[API] Finalize client') set_step('write_results') elif get_step() == 'write_results': current_app.logger.info('[STEP] Write Results') write_results( output_dir=OUTPUT_DIR, model=redis_get("global_model"), score=redis_get("score_of_global_model_on_local_test_set"), plot=None) if redis_get('is_coordinator'): # The coordinator is already finished now redis_set('finished', [True]) # Coordinator and clients continue with the finalize step set_step("finalize") elif get_step() == 'finalize': current_app.logger.info('[STEP] finalize') current_app.logger.info("[API] Finalize") if redis_get('is_coordinator'): # The coordinator waits until all clients have finished if have_clients_finished(): current_app.logger.info('[API] Finalize coordinator.') set_step('finished') else: current_app.logger.info( '[API] Not all clients have finished yet.') else: # The clients set available true to signal the coordinator that they have written the results. redis_set('available', True) elif get_step() == 'finished': # All clients and the coordinator set available to False and finished to True and the computation is done current_app.logger.info('[STEP] finished') return jsonify({'available': False, 'finished': True}) return jsonify({ 'available': True if available else False, 'finished': False })