def run_without_column_excluding(model, datasets: list, datasets_names: list, thresholds: list, threshold_col_names: list, path_to_predictions: str, path_to_metrics: str) -> None: all_accuracy_list = [] all_f1_score_list = [] all_precision_list = [] all_sensitivity_list = [] all_specificity_list = [] for idx, df in enumerate(datasets): features = df.drop(columns=['y'], axis=1) target = df['y'] result_df, y_test = run_model(model=model, features=features, target=target, thresholds=thresholds, threshold_col_names=threshold_col_names, test_size=0.3) accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics( result_df, y_test, threshold_col_names) prediction_file_name = datasets_names[idx] save_prediction_df(result_df, prediction_file_name, path_to_predictions) all_accuracy_list.append(accuracy_list) all_f1_score_list.append(f1_score_list) all_precision_list.append(precision_list) all_sensitivity_list.append(sensitivity_list) all_specificity_list.append(specificity_list) save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list, all_sensitivity_list, all_specificity_list, threshold_col_names, path_to_metrics)
client_id=client_id, client_secret=client_secret, authorize_url="https://accounts.google.com/o/oauth2/v2/auth", token_url="https://www.googleapis.com/oauth2/v4/token", on_login=on_google_login, on_error=on_google_error, scopes=['email', 'profile', 'openid'])) app.add_routes([web.get("/", index), web.get("/auth/logout", logout)]) return app if __name__ == "__main__": client_id = os.getenv('CLIENT_ID') if not client_id: logger.warning('CLIENT_ID environment variable is missing') exit(1) client_secret = os.getenv('CLIENT_SECRET') if not client_secret: logger.warning('CLIENT_SECRET environment variable is missing') exit(1) app = app_factory(client_id, client_secret) app.add_routes([ web.view('/status', StatusView), web.view('/api/v1/fake1', Fake1View), ]) app['metrics'] = create_metrics() start_http_server(8000) # Prometheus metrics server web.run_app(app, host="0.0.0.0", port=80)
def run_with_hyperparameter_search_and_column_excluding( model, model_params: dict, scoring: str, datasets: list, datasets_names: list, thresholds: list, threshold_col_names: list, num_of_cols: int, path_to_predictions_col_excluding: str, path_to_metrics_col_excluding: str, path_to_model_params_col_excluding: str) -> None: all_accuracy_list = [] all_f1_score_list = [] all_precision_list = [] all_sensitivity_list = [] all_specificity_list = [] # Predict on all the datasets separately by using the best model for col_to_exclude in range(num_of_cols): max_best_model = None for idx, df in enumerate(datasets): col_name_to_exclude = 'x' + str(col_to_exclude + 1) features = df.drop(columns=[col_name_to_exclude, 'y'], axis=1) target = df['y'] x_train, x_test, y_train, y_test = train_test_split( features, target, test_size=0.3, random_state=42) # Preprocess data standard_scaler = StandardScaler() x_train_norm = standard_scaler.fit_transform(x_train) x_test_norm = standard_scaler.transform(x_test) # Convert ndarrays to DataFrames features_column_names = features.columns x_train = pd.DataFrame(data=x_train_norm, index=y_train.index, columns=features_column_names) x_test = pd.DataFrame(data=x_test_norm, index=y_test.index, columns=features_column_names) # clf = GridSearchCV(model, model_params, cv=10, verbose=0, n_jobs=-1) clf = RandomizedSearchCV(model, model_params, cv=5, n_iter=50, refit=True, verbose=0, n_jobs=-1, scoring=scoring) """ tune_search = TuneGridSearchCV( model, model_params, refit=True, max_iters=10, use_gpu=True, scoring='f1', early_stopping=True, n_jobs=-1, local_dir='D:/ray_tune' ) """ best_model = clf.fit(x_train, y_train) # Save best parameters into csv file best_params_df_name = str(idx + 1) + '_' + str(col_to_exclude) + '.csv' save_best_model_parameters( best_params_dict=best_model.best_params_, dataset_name=best_params_df_name, path=path_to_model_params_col_excluding) result_df, y_test = test_model( trained_model=best_model, x_test=x_test, y_test=y_test, thresholds=thresholds, threshold_col_names=threshold_col_names) if max_best_model is None: max_best_model = best_model else: if max_best_model.best_score_ < best_model.best_score_: max_best_model = best_model accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics( result_df, y_test, threshold_col_names) print('---Max_depth of best model: ' + str([ str(est.get_depth()) + '-' + str(est.max_depth) for est in best_model.best_estimator_.estimators_ ])) prediction_file_name = datasets_names[idx].split( '.')[0] + '_' + str(col_to_exclude) + '.csv' save_prediction_df(result_df, prediction_file_name, path_to_predictions_col_excluding) all_accuracy_list.append(accuracy_list) all_f1_score_list.append(f1_score_list) all_precision_list.append(precision_list) all_sensitivity_list.append(sensitivity_list) all_specificity_list.append(specificity_list) print('Finished with ' + str(idx + 1) + ' dataset, column excluded: ' + str(col_to_exclude)) print('Max_depth of one of the tree of best model: ' + str( max([ est.get_depth() for est in max_best_model.best_estimator_.estimators_ ]))) save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list, all_sensitivity_list, all_specificity_list, threshold_col_names, path_to_metrics_col_excluding, str(col_to_exclude))
def run_with_hyperparameter_search_and_without_column_excluding( model, model_params: dict, scoring: str, datasets: list, datasets_names: list, thresholds: list, threshold_col_names: list, path_to_model_params: str, path_to_predictions: str, path_to_metrics: str) -> None: all_accuracy_list = [] all_f1_score_list = [] all_precision_list = [] all_sensitivity_list = [] all_specificity_list = [] max_best_model = None for idx, df in enumerate(datasets): features = df.drop(columns=['y'], axis=1) target = df['y'] x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42) # Preprocess data standard_scaler = StandardScaler() x_train_norm = standard_scaler.fit_transform(x_train) x_test_norm = standard_scaler.transform(x_test) # Convert ndarrays to DataFrames features_column_names = features.columns x_train = pd.DataFrame(data=x_train_norm, index=y_train.index, columns=features_column_names) x_test = pd.DataFrame(data=x_test_norm, index=y_test.index, columns=features_column_names) clf = RandomizedSearchCV(model, model_params, cv=5, n_iter=50, refit=True, verbose=0, n_jobs=-1, scoring=scoring) best_model = clf.fit(x_train, y_train) # Save best parameters into csv file best_params_df_name = str(idx + 1) + '.csv' save_best_model_parameters(best_params_dict=best_model.best_params_, dataset_name=best_params_df_name, path=path_to_model_params) # Predict outcomes result_df, y_test = test_model(trained_model=best_model, x_test=x_test, y_test=y_test, thresholds=thresholds, threshold_col_names=threshold_col_names) if max_best_model is None: max_best_model = best_model else: if max_best_model.best_score_ < best_model.best_score_: max_best_model = best_model accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics( result_df, y_test, threshold_col_names) print('---Max_depth of best model: ' + str([ str(est.get_depth()) + '-' + str(est.max_depth) for est in best_model.best_estimator_.estimators_ ])) prediction_file_name = datasets_names[idx] save_prediction_df(result_df, prediction_file_name, path_to_predictions) all_accuracy_list.append(accuracy_list) all_f1_score_list.append(f1_score_list) all_precision_list.append(precision_list) all_sensitivity_list.append(sensitivity_list) all_specificity_list.append(specificity_list) print('Finished with ' + str(idx + 1) + ' dataset') print('Max_depth of one of the tree of best model: ' + str( max([ est.get_depth() for est in max_best_model.best_estimator_.estimators_ ]))) save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list, all_sensitivity_list, all_specificity_list, threshold_col_names, path_to_metrics)
def run_with_column_excluding(model, num_of_cols: int, datasets: list, datasets_names: list, thresholds: list, threshold_col_names: list, path_to_predictions_col_excluding: str, path_to_metrics_col_excluding: str) -> None: all_accuracy_list = [] all_f1_score_list = [] all_precision_list = [] all_sensitivity_list = [] all_specificity_list = [] for col_to_exclude in range(num_of_cols): for idx, df in enumerate(datasets): col_name_to_exclude = 'x' + str(col_to_exclude + 1) features = df.drop(columns=[col_name_to_exclude, 'y'], axis=1) target = df['y'] result_df, y_test = run_model( model=model, features=features, target=target, thresholds=thresholds, threshold_col_names=threshold_col_names, test_size=0.3) accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics( result_df, y_test, threshold_col_names) prediction_file_name = datasets_names[idx].split( '.')[0] + '_' + str(col_to_exclude) + '.csv' save_prediction_df(result_df, prediction_file_name, path_to_predictions_col_excluding) all_accuracy_list.append(accuracy_list) all_f1_score_list.append(f1_score_list) all_precision_list.append(precision_list) all_sensitivity_list.append(sensitivity_list) all_specificity_list.append(specificity_list) save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list, all_sensitivity_list, all_specificity_list, threshold_col_names, path_to_metrics_col_excluding, str(col_to_exclude))
metrics["beats"] = beats return metrics if __name__ == "__main__": data_file = 'test_data1.csv' logging.config.fileConfig('logger_config.ini', disable_existing_loggers=False) data = csv_reader(data_file) time_array = data[0] voltage_array = data[1] validate(time_array, voltage_array) beats = beats_test(threshold=0.7, voltage_array=voltage_array, time_array=time_array ) num_beats = num_beats_test(threshold=0.7, voltage_array=voltage_array) duration = duration(time_array=time_array) mean_heart_rate = mean_beats(threshold=0.7, voltage_array=voltage_array, time_array=time_array ) voltage_extremes = voltage_extremes(voltage_array=voltage_array) my_dictionary = create_metrics() my_dictionary = add_word(my_dictionary) logging.info(my_dictionary) convert_json(my_dictionary, f=data_file)
def run_with_column_excluding(self, model, model_params=None, use_hyper_opt=False, scoring=None): if model_params is None: model_params = {} for col_to_exclude in self.feature_cols_idx: for filename in self.file_names: # Split data into features and target DataFrames tmp_df = self.df.loc[self.df["filename"] == filename] features = tmp_df.iloc[:, self.feature_cols_idx] target = tmp_df.iloc[:, self.target_col_idx] result_df = pd.DataFrame() if use_hyper_opt is False: result_df = self._run_model(model=model, features=features, target=target, col_to_exclude=col_to_exclude) else: clf = RandomizedSearchCV(model, model_params, cv=5, n_iter=50, refit=True, verbose=0, n_jobs=-1, scoring=scoring) result_df = self._run_model(model=clf, features=features, target=target, col_to_exclude=col_to_exclude, use_hyper_opt=True) accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics( result_df, self.y_test, self.threshold_col_names) # Save the "generated" prediction DataFrame prediction_file_name = filename.split('.')[0] + '_' + str( col_to_exclude) + '.csv' save_prediction_df(result_df, prediction_file_name, self.path_to_predictions_col_excluding) self.all_accuracy_list.append(accuracy_list) self.all_f1_score_list.append(f1_score_list) self.all_precision_list.append(precision_list) self.all_sensitivity_list.append(sensitivity_list) self.all_specificity_list.append(specificity_list) print('Finished with ' + filename + ' dataset, column excluded: ' + str(col_to_exclude)) # Save all the stored evaluation metrics to the given path save_metrics(self.all_accuracy_list, self.all_f1_score_list, self.all_precision_list, self.all_sensitivity_list, self.all_specificity_list, self.threshold_col_names, self.path_to_metrics_col_excluding, str(col_to_exclude))