def test_datasource_classifier_unpicklable(self):
        wrong_config = config.copy()
        wrong_config[
            'datasource_classifier'] = "test_models/unpicklable_model.txt"

        with self.assertRaises(ex.InvalidConfigValueException):
            mp.load_classifier(wrong_config, self.current_model_type_std[0])
    def test_datasource_classifier_is_wrong(self):
        wrong_config = config.copy()
        wrong_config[
            'datasource_classifier'] = "i_am_not_an_existing_file.yeah"

        with self.assertRaises(ex.InvalidConfigValueException):
            mp.load_classifier(wrong_config, self.current_model_type_std[0])
    def test_current_model_type_does_not_exist(self):
        wrong_config = copy.deepcopy(config)
        wrong_config[
            'datasource_classifier'] = "test_models/model_returns_code_0.txt"
        wrong_current_model_type = "Ich_bin_kein_event"

        with self.assertRaises(ex.InvalidConfigKeyException):
            mp.load_classifier(wrong_config, wrong_current_model_type)
    def test_model_at_event_is_empty_returns_KNC(self):
        test_config = config.copy()
        test_config[
            'datasource_classifier'] = "test_models/model_test_returns_not_existing_classifier.txt"

        tested_model = mp.load_classifier(test_config,
                                          self.current_model_type_std[0])

        self.assertTrue(isinstance(tested_model, type(self.model)))
    def test_for_create_new_classifier_true(self):
        wrong_config = copy.deepcopy(config)
        wrong_config["create_new_classifier"] = "True"

        self.assertTrue(
            isinstance(
                mp.load_classifier(wrong_config,
                                   self.current_model_type_std[0], True),
                type(self.model)))
 def test_returns_existing_classifier(self):
     classification_config = config.copy()
     classification_config[
         'datasource_classifier'] = "test_models/model_test_returns_existing_classifier.txt"
     self.assertTrue(
         isinstance(
             mp.load_classifier(classification_config,
                                self.current_model_type_std[0]),
             type(self.model)))
    def test_new_classifer_method_is_not_empty(self):
        test_config = config.copy()
        test_config[
            'datasource_classifier'] = "test_models/model_returns_code_0.txt"
        test_config['new_classifier_method'] = "kNN"
        test_model = mp.load_classifier(test_config,
                                        self.current_model_type_std[0])

        self.assertTrue(isinstance(test_model, type(self.model)))
    def test_for_existing_models_in_dictionary(self):
        test_config = config.copy()
        test_config[
            'datasource_classifier'] = "test_models/model_returns_code_0.txt"
        test_config['selected_event'] = "warmwasseraufbereitung"
        mp.persist_classifier(self.model, test_config,
                              self.current_model_type_std[1])

        existing_model = mp.load_classifier(test_config,
                                            self.current_model_type_std[1])
        self.assertTrue(isinstance(existing_model, type(self.model)))
    def test_returns_not_existing_classifier(self):
        wrong_config = copy.deepcopy(config)
        wrong_config[
            "datasource_classifier"] = "test_models/model_test_returns_existing_classifier.txt"
        classifier_dictionary = mp.load_dictionary(
            "test_models/model_test_returns_existing_classifier.txt")
        classifier_dictionary["abtauzyklus"] = ""
        mp.save_dictionary(
            classifier_dictionary,
            "test_models/model_test_returns_existing_classifier.txt")

        self.assertTrue(
            mp.load_classifier(wrong_config, self.current_model_type_std[0]))
    def test_model_for_event_is_not_instance_of_sklearn(self):
        path_to_wrong_dictionary = "test_models/model_returns_code_0_contains_wrong_model_at_abauzyklus.txt"
        classifier_dictionary = mp.load_dictionary(
            "test_models/model_returns_code_0.txt")
        classifier_dictionary['abtauzyklus'] = "Ich bin kein richtiges Model"
        mp.save_dictionary(classifier_dictionary, path_to_wrong_dictionary)

        wrong_config = config.copy()
        wrong_config['datasource_classifier'] = path_to_wrong_dictionary

        test_model = mp.load_classifier(wrong_config,
                                        self.current_model_type_std[0])
        self.assertFalse(isinstance(test_model, type(self.model)))
    def test_datasource_classifier_none(self):
        wrong_config = config.copy()
        wrong_config["datasource_classifier"] = None

        with self.assertRaises(ex.InvalidConfigValueException):
            mp.load_classifier(wrong_config, self.current_model_type_std[0])
    def test_datasource_classifer_deleted(self):
        wrong_config = copy.deepcopy(config)
        del wrong_config['datasource_classifier']

        with self.assertRaises(ex.InvalidConfigKeyException):
            mp.load_classifier(wrong_config, self.current_model_type_std[0])
 def test_wrong_structure_config(self):
     string = "Hallo"
     with self.assertRaises(ex.ConfigTypeException):
         mp.load_classifier(string, self.current_model_type_std[0])
def train_classifier(config):
    """
    Name in documentation: klassifizierer_trainieren()

    Train a classifier to identify a specific event.
    :param classification_config: Contains parameters for training the classifier
    :param classifier:(sklearn-object) a classification algorithm form the sklearn package
    :raises InvalidConfigValueException:Raised if a value inside of the config is wrong
    :raises  PersistorException: Raised if classifier is not an instance of sklearn
    :return int: Status code that indicates whether the training was successful(0 Success, 1 Failure)"""
    try:
        selected_event, required_score, test_size, datasource_marked_data, start_time, end_time, events = get_config_parameter(
            config)
    except Exception:
        raise exce.InvalidConfigValueException
    logger.info("config parameter loaded")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise exce.InvalidConfigValueException(str(e))
    df = read_manager.read_query(
        datasource_marked_data,
        f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
    )
    for event in events:
        end_start = markers[event]
        start_event = list(end_start.keys())[0]
        end_event = list(end_start.values())[len(end_start) - 1]
        if (str(df.index[0]) > start_event) or (str(df.index[-1]) < end_event):
            raise exce.ConfigException(
                'time frame of trainingsdata not in selected data frame included'
            )
        df_copy = df.copy()[start_event:end_event]

        try:
            classifier = model_persistor.load_classifier(config, event, True)
        except Exception as e:
            raise exce.PersistorException(str(e))
        logger.info("model loaded")
        df_copy.dropna(inplace=True)
        y = np.array(df_copy[event])
        for drop_event in events:
            df_copy = df_copy.drop(labels=[drop_event, f"{drop_event}_marker"],
                                   axis=1)
        X = df_copy.to_numpy()
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=test_size)
        except Exception as e:
            raise exce.SklearnException(str(e))
        try:
            classifier = classifier.fit(X_train, y_train)
        except Exception as e:
            raise exce.SklearnException(str(e))
        logger.info("model trained")
        if evaluate_classifier(classifier, required_score[event], X_test,
                               y_test):
            model_persistor.persist_classifier(classifier, config, event)
            logger.info('model persisted')
        else:
            logger.info('score too low, model not persisted')
    return 0
Esempio n. 15
0
def apply_classifier(config):
    """
    Name in documentation: klassifizierer_anwenden()

    Marks the occurrences of the selected event in the data with the use of the classifier

    :param config: Dictionary from which the parameters will be extracted

    :raises ConfigTypeException: if the message body contains an invalid config
    :raises InvalidConfigValueException:
    :raises DBException: if there are problems with the database connection
    :raises SKLearnException: if there are problems with Sklearn methods

    :return int: Status code that indicates whether the classifying was successful(0 Success, 1 Failure)
    """

    logger.info("starting classification...")
    if not isinstance(config, dict):
        raise ex.ConfigTypeException(
            "Wrong data structure of configuration: " + str(config))

    trainingsdata_editing_engine.enrich_data(config)
    logger.info("data enriched...")

    datasource_enriched_data, \
        datasource_classified_data, \
        timeframe, selected_event, \
        measurement_classified, \
        datasource_raw_data, \
        measurement_raw, \
        events, \
        measurement_enriched, \
        datasource_predicted_data, \
        measurement_predicted = get_config_parameter(config)

    logger.info("Fetched relevant data...")

    try:
        start = convert_time(timeframe[0])
        end = convert_time(timeframe[1])
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))

    try:
        df_query = read_manager.read_query(
            datasource_enriched_data,
            f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
        )
        if selected_event == 'pred':
            df_raw = read_manager.read_query(
                datasource_predicted_data,
                f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms"
            )
        else:
            df_raw = read_manager.read_query(
                datasource_raw_data,
                f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms"
            )

        df_raw.dropna(inplace=True)

        df_raw = df_raw.drop(df_raw.index[-1])
        df_raw = df_raw.drop(df_raw.index[0])
    except Exception:
        raise ex.DBException("Exception in read_manager")

    for event in events:
        df = df_query.copy()
        model = model_persistor.load_classifier(config, event)
        logger.info("Model geladen...")
        try:
            df.dropna(inplace=True)
        except IndexError:
            raise ex.DBException('Wrong query')
        classified_data_df = df.copy()
        try:
            classified_data_df[event] = model.predict(df)
            logger.info("Daten klassifiziert...")
        except sklearn.exceptions.NotFittedError:
            raise ex.SklearnException("Classifier not fitted")
        except ValueError:
            raise ex.SklearnException(
                "Input contains NaN, infinity or a value too large for dtype('float64')"
            )

        df_raw[event] = classified_data_df[event]
    if 'warmwasseraufbereitung' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung'].index.hour < 22),
                   'warmwasseraufbereitung'] = 0

    if 'warmwasseraufbereitung_pred' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung_pred'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung_pred'].index.hour < 22),
                   'warmwasseraufbereitung_pred'] = 0

    try:
        if selected_event == 'pred':
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          selected_event)
        else:
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          measurement_classified)
        logger.info("Daten persistiert. Klassifizierung abgeschlossen")
    except Exception:
        raise ex.DBException("Exception in write_manager")

    return 0