def test_datasource_classifier_unpicklable(self): wrong_config = config.copy() wrong_config[ 'datasource_classifier'] = "test_models/unpicklable_model.txt" with self.assertRaises(ex.InvalidConfigValueException): mp.load_classifier(wrong_config, self.current_model_type_std[0])
def test_datasource_classifier_is_wrong(self): wrong_config = config.copy() wrong_config[ 'datasource_classifier'] = "i_am_not_an_existing_file.yeah" with self.assertRaises(ex.InvalidConfigValueException): mp.load_classifier(wrong_config, self.current_model_type_std[0])
def test_current_model_type_does_not_exist(self): wrong_config = copy.deepcopy(config) wrong_config[ 'datasource_classifier'] = "test_models/model_returns_code_0.txt" wrong_current_model_type = "Ich_bin_kein_event" with self.assertRaises(ex.InvalidConfigKeyException): mp.load_classifier(wrong_config, wrong_current_model_type)
def test_model_at_event_is_empty_returns_KNC(self): test_config = config.copy() test_config[ 'datasource_classifier'] = "test_models/model_test_returns_not_existing_classifier.txt" tested_model = mp.load_classifier(test_config, self.current_model_type_std[0]) self.assertTrue(isinstance(tested_model, type(self.model)))
def test_for_create_new_classifier_true(self): wrong_config = copy.deepcopy(config) wrong_config["create_new_classifier"] = "True" self.assertTrue( isinstance( mp.load_classifier(wrong_config, self.current_model_type_std[0], True), type(self.model)))
def test_returns_existing_classifier(self): classification_config = config.copy() classification_config[ 'datasource_classifier'] = "test_models/model_test_returns_existing_classifier.txt" self.assertTrue( isinstance( mp.load_classifier(classification_config, self.current_model_type_std[0]), type(self.model)))
def test_new_classifer_method_is_not_empty(self): test_config = config.copy() test_config[ 'datasource_classifier'] = "test_models/model_returns_code_0.txt" test_config['new_classifier_method'] = "kNN" test_model = mp.load_classifier(test_config, self.current_model_type_std[0]) self.assertTrue(isinstance(test_model, type(self.model)))
def test_for_existing_models_in_dictionary(self): test_config = config.copy() test_config[ 'datasource_classifier'] = "test_models/model_returns_code_0.txt" test_config['selected_event'] = "warmwasseraufbereitung" mp.persist_classifier(self.model, test_config, self.current_model_type_std[1]) existing_model = mp.load_classifier(test_config, self.current_model_type_std[1]) self.assertTrue(isinstance(existing_model, type(self.model)))
def test_returns_not_existing_classifier(self): wrong_config = copy.deepcopy(config) wrong_config[ "datasource_classifier"] = "test_models/model_test_returns_existing_classifier.txt" classifier_dictionary = mp.load_dictionary( "test_models/model_test_returns_existing_classifier.txt") classifier_dictionary["abtauzyklus"] = "" mp.save_dictionary( classifier_dictionary, "test_models/model_test_returns_existing_classifier.txt") self.assertTrue( mp.load_classifier(wrong_config, self.current_model_type_std[0]))
def test_model_for_event_is_not_instance_of_sklearn(self): path_to_wrong_dictionary = "test_models/model_returns_code_0_contains_wrong_model_at_abauzyklus.txt" classifier_dictionary = mp.load_dictionary( "test_models/model_returns_code_0.txt") classifier_dictionary['abtauzyklus'] = "Ich bin kein richtiges Model" mp.save_dictionary(classifier_dictionary, path_to_wrong_dictionary) wrong_config = config.copy() wrong_config['datasource_classifier'] = path_to_wrong_dictionary test_model = mp.load_classifier(wrong_config, self.current_model_type_std[0]) self.assertFalse(isinstance(test_model, type(self.model)))
def test_datasource_classifier_none(self): wrong_config = config.copy() wrong_config["datasource_classifier"] = None with self.assertRaises(ex.InvalidConfigValueException): mp.load_classifier(wrong_config, self.current_model_type_std[0])
def test_datasource_classifer_deleted(self): wrong_config = copy.deepcopy(config) del wrong_config['datasource_classifier'] with self.assertRaises(ex.InvalidConfigKeyException): mp.load_classifier(wrong_config, self.current_model_type_std[0])
def test_wrong_structure_config(self): string = "Hallo" with self.assertRaises(ex.ConfigTypeException): mp.load_classifier(string, self.current_model_type_std[0])
def train_classifier(config): """ Name in documentation: klassifizierer_trainieren() Train a classifier to identify a specific event. :param classification_config: Contains parameters for training the classifier :param classifier:(sklearn-object) a classification algorithm form the sklearn package :raises InvalidConfigValueException:Raised if a value inside of the config is wrong :raises PersistorException: Raised if classifier is not an instance of sklearn :return int: Status code that indicates whether the training was successful(0 Success, 1 Failure)""" try: selected_event, required_score, test_size, datasource_marked_data, start_time, end_time, events = get_config_parameter( config) except Exception: raise exce.InvalidConfigValueException logger.info("config parameter loaded") try: start = convert_time(start_time) end = convert_time(end_time) except Exception as e: raise exce.InvalidConfigValueException(str(e)) df = read_manager.read_query( datasource_marked_data, f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms" ) for event in events: end_start = markers[event] start_event = list(end_start.keys())[0] end_event = list(end_start.values())[len(end_start) - 1] if (str(df.index[0]) > start_event) or (str(df.index[-1]) < end_event): raise exce.ConfigException( 'time frame of trainingsdata not in selected data frame included' ) df_copy = df.copy()[start_event:end_event] try: classifier = model_persistor.load_classifier(config, event, True) except Exception as e: raise exce.PersistorException(str(e)) logger.info("model loaded") df_copy.dropna(inplace=True) y = np.array(df_copy[event]) for drop_event in events: df_copy = df_copy.drop(labels=[drop_event, f"{drop_event}_marker"], axis=1) X = df_copy.to_numpy() try: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size) except Exception as e: raise exce.SklearnException(str(e)) try: classifier = classifier.fit(X_train, y_train) except Exception as e: raise exce.SklearnException(str(e)) logger.info("model trained") if evaluate_classifier(classifier, required_score[event], X_test, y_test): model_persistor.persist_classifier(classifier, config, event) logger.info('model persisted') else: logger.info('score too low, model not persisted') return 0
def apply_classifier(config): """ Name in documentation: klassifizierer_anwenden() Marks the occurrences of the selected event in the data with the use of the classifier :param config: Dictionary from which the parameters will be extracted :raises ConfigTypeException: if the message body contains an invalid config :raises InvalidConfigValueException: :raises DBException: if there are problems with the database connection :raises SKLearnException: if there are problems with Sklearn methods :return int: Status code that indicates whether the classifying was successful(0 Success, 1 Failure) """ logger.info("starting classification...") if not isinstance(config, dict): raise ex.ConfigTypeException( "Wrong data structure of configuration: " + str(config)) trainingsdata_editing_engine.enrich_data(config) logger.info("data enriched...") datasource_enriched_data, \ datasource_classified_data, \ timeframe, selected_event, \ measurement_classified, \ datasource_raw_data, \ measurement_raw, \ events, \ measurement_enriched, \ datasource_predicted_data, \ measurement_predicted = get_config_parameter(config) logger.info("Fetched relevant data...") try: start = convert_time(timeframe[0]) end = convert_time(timeframe[1]) except Exception as e: raise ex.InvalidConfigValueException(str(e)) try: df_query = read_manager.read_query( datasource_enriched_data, f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms" ) if selected_event == 'pred': df_raw = read_manager.read_query( datasource_predicted_data, f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms" ) else: df_raw = read_manager.read_query( datasource_raw_data, f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms" ) df_raw.dropna(inplace=True) df_raw = df_raw.drop(df_raw.index[-1]) df_raw = df_raw.drop(df_raw.index[0]) except Exception: raise ex.DBException("Exception in read_manager") for event in events: df = df_query.copy() model = model_persistor.load_classifier(config, event) logger.info("Model geladen...") try: df.dropna(inplace=True) except IndexError: raise ex.DBException('Wrong query') classified_data_df = df.copy() try: classified_data_df[event] = model.predict(df) logger.info("Daten klassifiziert...") except sklearn.exceptions.NotFittedError: raise ex.SklearnException("Classifier not fitted") except ValueError: raise ex.SklearnException( "Input contains NaN, infinity or a value too large for dtype('float64')" ) df_raw[event] = classified_data_df[event] if 'warmwasseraufbereitung' in df_raw.columns: df_raw.loc[(df_raw['warmwasseraufbereitung'].index.hour > 8) & (df_raw['warmwasseraufbereitung'].index.hour < 22), 'warmwasseraufbereitung'] = 0 if 'warmwasseraufbereitung_pred' in df_raw.columns: df_raw.loc[(df_raw['warmwasseraufbereitung_pred'].index.hour > 8) & (df_raw['warmwasseraufbereitung_pred'].index.hour < 22), 'warmwasseraufbereitung_pred'] = 0 try: if selected_event == 'pred': write_manager.write_dataframe(datasource_classified_data, df_raw, selected_event) else: write_manager.write_dataframe(datasource_classified_data, df_raw, measurement_classified) logger.info("Daten persistiert. Klassifizierung abgeschlossen") except Exception: raise ex.DBException("Exception in write_manager") return 0