Example #1
0
def run_without_column_excluding(model, datasets: list, datasets_names: list,
                                 thresholds: list, threshold_col_names: list,
                                 path_to_predictions: str,
                                 path_to_metrics: str) -> None:

    all_accuracy_list = []
    all_f1_score_list = []
    all_precision_list = []
    all_sensitivity_list = []
    all_specificity_list = []

    for idx, df in enumerate(datasets):
        features = df.drop(columns=['y'], axis=1)
        target = df['y']
        result_df, y_test = run_model(model=model,
                                      features=features,
                                      target=target,
                                      thresholds=thresholds,
                                      threshold_col_names=threshold_col_names,
                                      test_size=0.3)

        accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics(
            result_df, y_test, threshold_col_names)

        prediction_file_name = datasets_names[idx]
        save_prediction_df(result_df, prediction_file_name,
                           path_to_predictions)

        all_accuracy_list.append(accuracy_list)
        all_f1_score_list.append(f1_score_list)
        all_precision_list.append(precision_list)
        all_sensitivity_list.append(sensitivity_list)
        all_specificity_list.append(specificity_list)

    save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list,
                 all_sensitivity_list, all_specificity_list,
                 threshold_col_names, path_to_metrics)
Example #2
0
            client_id=client_id,
            client_secret=client_secret,
            authorize_url="https://accounts.google.com/o/oauth2/v2/auth",
            token_url="https://www.googleapis.com/oauth2/v4/token",
            on_login=on_google_login,
            on_error=on_google_error,
            scopes=['email', 'profile', 'openid']))

    app.add_routes([web.get("/", index), web.get("/auth/logout", logout)])
    return app


if __name__ == "__main__":
    client_id = os.getenv('CLIENT_ID')
    if not client_id:
        logger.warning('CLIENT_ID environment variable is missing')
        exit(1)
    client_secret = os.getenv('CLIENT_SECRET')
    if not client_secret:
        logger.warning('CLIENT_SECRET environment variable is missing')
        exit(1)
    app = app_factory(client_id, client_secret)
    app.add_routes([
        web.view('/status', StatusView),
        web.view('/api/v1/fake1', Fake1View),
    ])

    app['metrics'] = create_metrics()
    start_http_server(8000)  # Prometheus metrics server
    web.run_app(app, host="0.0.0.0", port=80)
Example #3
0
def run_with_hyperparameter_search_and_column_excluding(
        model, model_params: dict, scoring: str, datasets: list,
        datasets_names: list, thresholds: list, threshold_col_names: list,
        num_of_cols: int, path_to_predictions_col_excluding: str,
        path_to_metrics_col_excluding: str,
        path_to_model_params_col_excluding: str) -> None:

    all_accuracy_list = []
    all_f1_score_list = []
    all_precision_list = []
    all_sensitivity_list = []
    all_specificity_list = []

    # Predict on all the datasets separately by using the best model
    for col_to_exclude in range(num_of_cols):

        max_best_model = None

        for idx, df in enumerate(datasets):

            col_name_to_exclude = 'x' + str(col_to_exclude + 1)
            features = df.drop(columns=[col_name_to_exclude, 'y'], axis=1)
            target = df['y']

            x_train, x_test, y_train, y_test = train_test_split(
                features, target, test_size=0.3, random_state=42)

            # Preprocess data
            standard_scaler = StandardScaler()
            x_train_norm = standard_scaler.fit_transform(x_train)
            x_test_norm = standard_scaler.transform(x_test)

            # Convert ndarrays to DataFrames
            features_column_names = features.columns
            x_train = pd.DataFrame(data=x_train_norm,
                                   index=y_train.index,
                                   columns=features_column_names)
            x_test = pd.DataFrame(data=x_test_norm,
                                  index=y_test.index,
                                  columns=features_column_names)

            # clf = GridSearchCV(model, model_params, cv=10, verbose=0, n_jobs=-1)
            clf = RandomizedSearchCV(model,
                                     model_params,
                                     cv=5,
                                     n_iter=50,
                                     refit=True,
                                     verbose=0,
                                     n_jobs=-1,
                                     scoring=scoring)
            """
            tune_search = TuneGridSearchCV(
                model, model_params, refit=True, max_iters=10,
                use_gpu=True, scoring='f1', early_stopping=True, n_jobs=-1, local_dir='D:/ray_tune'
            )
            """
            best_model = clf.fit(x_train, y_train)

            # Save best parameters into csv file
            best_params_df_name = str(idx +
                                      1) + '_' + str(col_to_exclude) + '.csv'
            save_best_model_parameters(
                best_params_dict=best_model.best_params_,
                dataset_name=best_params_df_name,
                path=path_to_model_params_col_excluding)

            result_df, y_test = test_model(
                trained_model=best_model,
                x_test=x_test,
                y_test=y_test,
                thresholds=thresholds,
                threshold_col_names=threshold_col_names)

            if max_best_model is None:
                max_best_model = best_model
            else:
                if max_best_model.best_score_ < best_model.best_score_:
                    max_best_model = best_model

            accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics(
                result_df, y_test, threshold_col_names)

            print('---Max_depth of best model: ' + str([
                str(est.get_depth()) + '-' + str(est.max_depth)
                for est in best_model.best_estimator_.estimators_
            ]))

            prediction_file_name = datasets_names[idx].split(
                '.')[0] + '_' + str(col_to_exclude) + '.csv'
            save_prediction_df(result_df, prediction_file_name,
                               path_to_predictions_col_excluding)

            all_accuracy_list.append(accuracy_list)
            all_f1_score_list.append(f1_score_list)
            all_precision_list.append(precision_list)
            all_sensitivity_list.append(sensitivity_list)
            all_specificity_list.append(specificity_list)

            print('Finished with ' + str(idx + 1) +
                  ' dataset, column excluded: ' + str(col_to_exclude))

        print('Max_depth of one of the tree of best model: ' + str(
            max([
                est.get_depth()
                for est in max_best_model.best_estimator_.estimators_
            ])))

        save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list,
                     all_sensitivity_list, all_specificity_list,
                     threshold_col_names, path_to_metrics_col_excluding,
                     str(col_to_exclude))
Example #4
0
def run_with_hyperparameter_search_and_without_column_excluding(
        model, model_params: dict, scoring: str, datasets: list,
        datasets_names: list, thresholds: list, threshold_col_names: list,
        path_to_model_params: str, path_to_predictions: str,
        path_to_metrics: str) -> None:

    all_accuracy_list = []
    all_f1_score_list = []
    all_precision_list = []
    all_sensitivity_list = []
    all_specificity_list = []

    max_best_model = None

    for idx, df in enumerate(datasets):
        features = df.drop(columns=['y'], axis=1)
        target = df['y']

        x_train, x_test, y_train, y_test = train_test_split(features,
                                                            target,
                                                            test_size=0.3,
                                                            random_state=42)

        # Preprocess data
        standard_scaler = StandardScaler()
        x_train_norm = standard_scaler.fit_transform(x_train)
        x_test_norm = standard_scaler.transform(x_test)

        # Convert ndarrays to DataFrames
        features_column_names = features.columns
        x_train = pd.DataFrame(data=x_train_norm,
                               index=y_train.index,
                               columns=features_column_names)
        x_test = pd.DataFrame(data=x_test_norm,
                              index=y_test.index,
                              columns=features_column_names)

        clf = RandomizedSearchCV(model,
                                 model_params,
                                 cv=5,
                                 n_iter=50,
                                 refit=True,
                                 verbose=0,
                                 n_jobs=-1,
                                 scoring=scoring)

        best_model = clf.fit(x_train, y_train)

        # Save best parameters into csv file
        best_params_df_name = str(idx + 1) + '.csv'

        save_best_model_parameters(best_params_dict=best_model.best_params_,
                                   dataset_name=best_params_df_name,
                                   path=path_to_model_params)

        # Predict outcomes
        result_df, y_test = test_model(trained_model=best_model,
                                       x_test=x_test,
                                       y_test=y_test,
                                       thresholds=thresholds,
                                       threshold_col_names=threshold_col_names)

        if max_best_model is None:
            max_best_model = best_model
        else:
            if max_best_model.best_score_ < best_model.best_score_:
                max_best_model = best_model

        accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics(
            result_df, y_test, threshold_col_names)

        print('---Max_depth of best model: ' + str([
            str(est.get_depth()) + '-' + str(est.max_depth)
            for est in best_model.best_estimator_.estimators_
        ]))

        prediction_file_name = datasets_names[idx]
        save_prediction_df(result_df, prediction_file_name,
                           path_to_predictions)

        all_accuracy_list.append(accuracy_list)
        all_f1_score_list.append(f1_score_list)
        all_precision_list.append(precision_list)
        all_sensitivity_list.append(sensitivity_list)
        all_specificity_list.append(specificity_list)
        print('Finished with ' + str(idx + 1) + ' dataset')

    print('Max_depth of one of the tree of best model: ' + str(
        max([
            est.get_depth()
            for est in max_best_model.best_estimator_.estimators_
        ])))

    save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list,
                 all_sensitivity_list, all_specificity_list,
                 threshold_col_names, path_to_metrics)
Example #5
0
def run_with_column_excluding(model, num_of_cols: int, datasets: list,
                              datasets_names: list, thresholds: list,
                              threshold_col_names: list,
                              path_to_predictions_col_excluding: str,
                              path_to_metrics_col_excluding: str) -> None:

    all_accuracy_list = []
    all_f1_score_list = []
    all_precision_list = []
    all_sensitivity_list = []
    all_specificity_list = []

    for col_to_exclude in range(num_of_cols):
        for idx, df in enumerate(datasets):

            col_name_to_exclude = 'x' + str(col_to_exclude + 1)
            features = df.drop(columns=[col_name_to_exclude, 'y'], axis=1)
            target = df['y']
            result_df, y_test = run_model(
                model=model,
                features=features,
                target=target,
                thresholds=thresholds,
                threshold_col_names=threshold_col_names,
                test_size=0.3)

            accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics(
                result_df, y_test, threshold_col_names)

            prediction_file_name = datasets_names[idx].split(
                '.')[0] + '_' + str(col_to_exclude) + '.csv'
            save_prediction_df(result_df, prediction_file_name,
                               path_to_predictions_col_excluding)

            all_accuracy_list.append(accuracy_list)
            all_f1_score_list.append(f1_score_list)
            all_precision_list.append(precision_list)
            all_sensitivity_list.append(sensitivity_list)
            all_specificity_list.append(specificity_list)

        save_metrics(all_accuracy_list, all_f1_score_list, all_precision_list,
                     all_sensitivity_list, all_specificity_list,
                     threshold_col_names, path_to_metrics_col_excluding,
                     str(col_to_exclude))
    metrics["beats"] = beats

    return metrics


if __name__ == "__main__":
    data_file = 'test_data1.csv'
    logging.config.fileConfig('logger_config.ini', disable_existing_loggers=False)
    data = csv_reader(data_file)
    time_array = data[0]
    voltage_array = data[1]
    validate(time_array, voltage_array)
    beats = beats_test(threshold=0.7, voltage_array=voltage_array,
                       time_array=time_array
                       )
    num_beats = num_beats_test(threshold=0.7, voltage_array=voltage_array)
    duration = duration(time_array=time_array)
    mean_heart_rate = mean_beats(threshold=0.7, voltage_array=voltage_array,
                                 time_array=time_array
                                 )
    voltage_extremes = voltage_extremes(voltage_array=voltage_array)
    my_dictionary = create_metrics()
    my_dictionary = add_word(my_dictionary)
    logging.info(my_dictionary)
    convert_json(my_dictionary, f=data_file)





Example #7
0
    def run_with_column_excluding(self,
                                  model,
                                  model_params=None,
                                  use_hyper_opt=False,
                                  scoring=None):

        if model_params is None:
            model_params = {}

        for col_to_exclude in self.feature_cols_idx:
            for filename in self.file_names:
                # Split data into features and target DataFrames
                tmp_df = self.df.loc[self.df["filename"] == filename]
                features = tmp_df.iloc[:, self.feature_cols_idx]
                target = tmp_df.iloc[:, self.target_col_idx]

                result_df = pd.DataFrame()
                if use_hyper_opt is False:
                    result_df = self._run_model(model=model,
                                                features=features,
                                                target=target,
                                                col_to_exclude=col_to_exclude)
                else:
                    clf = RandomizedSearchCV(model,
                                             model_params,
                                             cv=5,
                                             n_iter=50,
                                             refit=True,
                                             verbose=0,
                                             n_jobs=-1,
                                             scoring=scoring)

                    result_df = self._run_model(model=clf,
                                                features=features,
                                                target=target,
                                                col_to_exclude=col_to_exclude,
                                                use_hyper_opt=True)

                accuracy_list, f1_score_list, precision_list, sensitivity_list, specificity_list = create_metrics(
                    result_df, self.y_test, self.threshold_col_names)

                # Save the "generated" prediction DataFrame
                prediction_file_name = filename.split('.')[0] + '_' + str(
                    col_to_exclude) + '.csv'
                save_prediction_df(result_df, prediction_file_name,
                                   self.path_to_predictions_col_excluding)

                self.all_accuracy_list.append(accuracy_list)
                self.all_f1_score_list.append(f1_score_list)
                self.all_precision_list.append(precision_list)
                self.all_sensitivity_list.append(sensitivity_list)
                self.all_specificity_list.append(specificity_list)

                print('Finished with ' + filename +
                      ' dataset, column excluded: ' + str(col_to_exclude))

            # Save all the stored evaluation metrics to the given path
            save_metrics(self.all_accuracy_list, self.all_f1_score_list,
                         self.all_precision_list, self.all_sensitivity_list,
                         self.all_specificity_list, self.threshold_col_names,
                         self.path_to_metrics_col_excluding,
                         str(col_to_exclude))