def get_data(from_db, from_measurement, value_name, register, time):
    """
    Retrieve data from database
    Name in documentation: get_data
    :param from_db: database from which the data should be extracted
    :param from_measurement: the measurement from which the data should be extracted
    :param value_name: name of the field in influxdb
    :param register: register of the desired curve which should be cleaned
    :param time: tart and end time of the desired period where the curve should be cleaned
    :return: returns temperature data as a Pandas.Series
    """
    query = ""
    if isInt(register):

        query = "SELECT {0} FROM {1} WHERE register='{2}' and time >= {3}ms and time <= {4}ms".format(
            value_name, from_measurement, register, time["from"], time["to"])
    else:
        query = "SELECT {0} FROM {1} WHERE time >={2}ms AND time <={3}ms".format(
            value_name, from_measurement, time["from"], time["to"])

    try:
        res = rm.read_query(from_db, query)
    except KeyError as ke:
        raise exc.NoDataException(
            "Result from Database was probably empty. Machste erstmal die basics"
        )

    return res
Ejemplo n.º 2
0
def get_current_modbus():
    df = rm.read_query(
        "db_steuerungsparameter",
        "SELECT * FROM steuerungsparameter ORDER BY DESC LIMIT 1")
    print(df.head())
    df.reset_index(drop=True, inplace=True)
    return df.to_dict()
Ejemplo n.º 3
0
def get_current_models():
    """
    //TODO ADD TO DOCUMENTATION
    Used to retrieve the current model metadata for display in a front-end application.
    :return: The current model metadata as json.
    """
    df = rm.read_query(
        "logs",
        "SELECT short_message FROM model GROUP BY short_message ORDER BY DESC LIMIT 1"
    )["short_message"]
    df.reset_index(drop=True, inplace=True)
    print(df)
    return df.to_dict()[0]
    def test_adds_all_values(self):
        test_result_set = ResultSet({
            "time": 1,
            "message": "success!"
        })
        when2(test_result_set.get_points).thenReturn({
            "time": 1,
            "message": "success!"
        })
        test_dataframe = pandas.DataFrame({'time': [1], 'message': ['success!']})
        when2(InfluxDBClient.query, "test_measurement").thenReturn(test_result_set)
        when2(pandas.DataFrame, {"time": 1, "message": "success!"}).thenReturn(test_dataframe)

        output = rm.read_query("test", "test_measurement")

        self.assertEqual(len(output["message"]), 1)
        self.assertTrue("success!" in output["message"]["1970-01-01 00:00:00.000000001"])
def train_classifier(config):
    """
    Name in documentation: klassifizierer_trainieren()

    Train a classifier to identify a specific event.
    :param classification_config: Contains parameters for training the classifier
    :param classifier:(sklearn-object) a classification algorithm form the sklearn package
    :raises InvalidConfigValueException:Raised if a value inside of the config is wrong
    :raises  PersistorException: Raised if classifier is not an instance of sklearn
    :return int: Status code that indicates whether the training was successful(0 Success, 1 Failure)"""
    try:
        selected_event, required_score, test_size, datasource_marked_data, start_time, end_time, events = get_config_parameter(
            config)
    except Exception:
        raise exce.InvalidConfigValueException
    logger.info("config parameter loaded")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise exce.InvalidConfigValueException(str(e))
    df = read_manager.read_query(
        datasource_marked_data,
        f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
    )
    for event in events:
        end_start = markers[event]
        start_event = list(end_start.keys())[0]
        end_event = list(end_start.values())[len(end_start) - 1]
        if (str(df.index[0]) > start_event) or (str(df.index[-1]) < end_event):
            raise exce.ConfigException(
                'time frame of trainingsdata not in selected data frame included'
            )
        df_copy = df.copy()[start_event:end_event]

        try:
            classifier = model_persistor.load_classifier(config, event, True)
        except Exception as e:
            raise exce.PersistorException(str(e))
        logger.info("model loaded")
        df_copy.dropna(inplace=True)
        y = np.array(df_copy[event])
        for drop_event in events:
            df_copy = df_copy.drop(labels=[drop_event, f"{drop_event}_marker"],
                                   axis=1)
        X = df_copy.to_numpy()
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=test_size)
        except Exception as e:
            raise exce.SklearnException(str(e))
        try:
            classifier = classifier.fit(X_train, y_train)
        except Exception as e:
            raise exce.SklearnException(str(e))
        logger.info("model trained")
        if evaluate_classifier(classifier, required_score[event], X_test,
                               y_test):
            model_persistor.persist_classifier(classifier, config, event)
            logger.info('model persisted')
        else:
            logger.info('score too low, model not persisted')
    return 0
Ejemplo n.º 6
0
def mark_data(config):
    """
    Name in documentation: daten_markieren()
    Mark the data with the occurrences of the selected event

    :param config: relevant parameters provided by the API

    :raises InvalidConfigValueException :Raised if a value inside of the config is wrong
    :raises InvalidConfigKeyException :Raised if a key in the config does not exist
    :raises DBException :if there are problems with the database connection

    :return int: Status code that indicates whether the marking was successful(0 Success, 1 Failure)
    """
    try:
        selected_event, datasource_raw_data, measurement_raw, start_time, end_time, register_dict, \
        required_registers, datasource_enriched_data, datasource_marked_data, \
        measurement_enriched, events, datasource_predicted_data, measurement_predicted = get_config_parameter(config)
    except Exception as e:
        raise ex.InvalidConfigKeyException("Key " + str(e) +
                                           " was not found in config")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))
    try:
        df = read_manager.read_query(
            datasource_enriched_data,
            f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
        )
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info('enriched_data loaded')
    for event in events:
        spaces = 0
        df[f"{event}_marker"] = 0.0  # Spalte des Event initialisieren
        points_for_event = markers[event]  # dictionary für das event holen
        for key in points_for_event.keys():
            if (str(df.index[0]) < key < str(df.index[-1])) and (
                    str(df.index[0]) < points_for_event[key] < str(
                        df.index[-1])
            ):  # überprüfen, ob key und value im Dataframe sind
                df.loc[key, f"{event}_marker"] = 1.0  # key ist der Startpunkt
                df.loc[points_for_event[key],
                       f"{event}_marker"] = -1.0  # value ist Endpunkt
        spaces = df.loc[(df[f"{event}_marker"] == 1.0) |
                        (df[f"{event}_marker"] == -1.0)].index.tolist()
        df[event] = 0.0
        if (len(spaces) % 2) == 1:
            raise ex.InvalidConfigValueException(
                'unable to create correct trainingsdata for this timeframe with given event values'
            )
        for i in range(0, len(spaces), 2):
            df.loc[spaces[i]:spaces[i + 1], event] = 1.0
    try:
        write_manager.write_dataframe(datasource_marked_data, df,
                                      selected_event)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info(f"marked_data for {selected_event} successfully persisted")
    return 0
Ejemplo n.º 7
0
def enrich_data(config):
    """
    Name in documentation: daten_erweitern()
    Enrich data to include values to be able to train the classifier or to classify the data afterwards

    :raises InvalidConfigValueException: Raised if a value inside of the config is wrong
    :raises InvalidConfigKeyException :Raised if a key in the config does not exist
    :raises DBException: if there are problems with the database connection
    :return int: Status code that indicates whether the enriching was successful(0 Success, 1 Failure)
    """

    try:
        selected_event, datasource_raw_data, measurement_raw, start_time, end_time, register_dict, \
        required_registers, datasource_enriched_data, datasource_marked_data, \
        measurement_enriched, events, datasource_predicted_data, measurement_predicted = get_config_parameter(config)
    except Exception as e:
        raise ex.InvalidConfigKeyException("Key " + str(e) +
                                           " was not found in config")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))
    counter = 0
    try:
        if selected_event == 'standard':
            df_query = read_manager.read_query(
                datasource_raw_data,
                f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms"
            )
        if selected_event == 'pred':
            df_query = read_manager.read_query(
                datasource_predicted_data,
                f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms"
            )
        df_query = df_query.astype('float64')
        if 'historic_weatherdata' in df_query.columns:
            df_query = df_query.drop(['historic_weatherdata'], axis=1)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info('raw_data loaded')
    for register in required_registers:
        if register_dict[register] not in df_query.columns:
            raise ex.InvalidConfigValueException(
                register_dict[register] + ' not found in dataframe columns')

        df_query[f'{register_dict[register]}_deriv'] = (
            df_query[f'{register_dict[register]}'].shift(-1) -
            (df_query[f'{register_dict[register]}'].shift(1))) / 2
        df_query[f'{register_dict[register]}_pct_ch'] = df_query[
            f'{register_dict[register]}'].pct_change(1)
        df_query[f'{register_dict[register]}_ch_abs'] = df_query[
            f'{register_dict[register]}'].diff(1)
        df_query[f'{register_dict[register]}_diff'] = df_query[f'{register_dict[register]}'] - \
                                                      df_query[f'{register_dict[register]}'].shift(-1)
        if counter == 0:
            df = df_query
            counter += 1
        else:
            df[f'{register_dict[register]}'] = df_query[
                f'{register_dict[register]}']
            df[f'{register_dict[register]}_deriv'] = df_query[
                f'{register_dict[register]}_deriv']
            df[f'{register_dict[register]}_pct_ch'] = df_query[
                f'{register_dict[register]}_pct_ch']
            df[f'{register_dict[register]}_ch_abs'] = df_query[
                f'{register_dict[register]}_ch_abs']
            df[f'{register_dict[register]}_diff'] = df_query[
                f'{register_dict[register]}_diff']
    try:
        write_manager.write_dataframe(datasource_enriched_data, df,
                                      selected_event)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info(
        f'enriched_data for {measurement_enriched} successfully persisted')
    return 0
Ejemplo n.º 8
0
def apply_classifier(config):
    """
    Name in documentation: klassifizierer_anwenden()

    Marks the occurrences of the selected event in the data with the use of the classifier

    :param config: Dictionary from which the parameters will be extracted

    :raises ConfigTypeException: if the message body contains an invalid config
    :raises InvalidConfigValueException:
    :raises DBException: if there are problems with the database connection
    :raises SKLearnException: if there are problems with Sklearn methods

    :return int: Status code that indicates whether the classifying was successful(0 Success, 1 Failure)
    """

    logger.info("starting classification...")
    if not isinstance(config, dict):
        raise ex.ConfigTypeException(
            "Wrong data structure of configuration: " + str(config))

    trainingsdata_editing_engine.enrich_data(config)
    logger.info("data enriched...")

    datasource_enriched_data, \
        datasource_classified_data, \
        timeframe, selected_event, \
        measurement_classified, \
        datasource_raw_data, \
        measurement_raw, \
        events, \
        measurement_enriched, \
        datasource_predicted_data, \
        measurement_predicted = get_config_parameter(config)

    logger.info("Fetched relevant data...")

    try:
        start = convert_time(timeframe[0])
        end = convert_time(timeframe[1])
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))

    try:
        df_query = read_manager.read_query(
            datasource_enriched_data,
            f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
        )
        if selected_event == 'pred':
            df_raw = read_manager.read_query(
                datasource_predicted_data,
                f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms"
            )
        else:
            df_raw = read_manager.read_query(
                datasource_raw_data,
                f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms"
            )

        df_raw.dropna(inplace=True)

        df_raw = df_raw.drop(df_raw.index[-1])
        df_raw = df_raw.drop(df_raw.index[0])
    except Exception:
        raise ex.DBException("Exception in read_manager")

    for event in events:
        df = df_query.copy()
        model = model_persistor.load_classifier(config, event)
        logger.info("Model geladen...")
        try:
            df.dropna(inplace=True)
        except IndexError:
            raise ex.DBException('Wrong query')
        classified_data_df = df.copy()
        try:
            classified_data_df[event] = model.predict(df)
            logger.info("Daten klassifiziert...")
        except sklearn.exceptions.NotFittedError:
            raise ex.SklearnException("Classifier not fitted")
        except ValueError:
            raise ex.SklearnException(
                "Input contains NaN, infinity or a value too large for dtype('float64')"
            )

        df_raw[event] = classified_data_df[event]
    if 'warmwasseraufbereitung' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung'].index.hour < 22),
                   'warmwasseraufbereitung'] = 0

    if 'warmwasseraufbereitung_pred' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung_pred'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung_pred'].index.hour < 22),
                   'warmwasseraufbereitung_pred'] = 0

    try:
        if selected_event == 'pred':
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          selected_event)
        else:
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          measurement_classified)
        logger.info("Daten persistiert. Klassifizierung abgeschlossen")
    except Exception:
        raise ex.DBException("Exception in write_manager")

    return 0
Ejemplo n.º 9
0
class MyTestCase(unittest.TestCase):
    current_model_type_std = config['standard']
    model_dict = mp.load_dictionary('model_classification_engine.txt')
    df_test = rm.read_query(
        'nilan_enriched',
        "SELECT * FROM standard WHERE time >= 1579225694062ms AND time <= 1579398691750ms"
    )
    df_test_pred = rm.read_query(
        'nilan_enriched',
        "SELECT * FROM pred WHERE time >= 1579225694062ms AND time <= 1579398691750ms"
    )
    df_pred_test = rm.read_query(
        'prediction_data',
        "SELECT * FROM vorhergesagteDaten WHERE time >= 1579225694062ms AND time <= 1579398691750ms"
    )

    @classmethod
    def setUp(self):
        unstub()

    def test_valid_inputs(self):
        test_config = copy.deepcopy(config)

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(self.model_dict['abtauzyklus'])
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        self.assertEqual(apply_classifier(test_config), 0)

    def test_for_pred_classification(
            self):  # input shape for pred_classifier???
        test_config = copy.deepcopy(config)
        test_config['selected_event'] = 'pred'

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test_pred)
        when2(mp.load_classifier, ANY, 'abtauzyklus_pred').thenReturn(
            self.model_dict['abtauzyklus_pred'])
        when2(mp.load_classifier, ANY,
              'warmwasseraufbereitung_pred').thenReturn(
                  self.model_dict['warmwasseraufbereitung_pred'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        self.assertEqual(apply_classifier(test_config), 0)

    def test_config_of_wrong_type(self):
        test_config = 'Hallo'

        with self.assertRaises(ex.ConfigTypeException):
            apply_classifier(test_config)

    def test_for_fail_in_connector(self):
        test_config = copy.deepcopy(config)
        test_config['datasource_enriched_data'][
            'database'] = "Ich_bin_keine_database"
        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, "Ich_bin_keine_database",
              ANY).thenRaise(ex.DBException)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(self.model_dict['abtauzyklus'])
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        with self.assertRaises(ex.DBException):
            apply_classifier(test_config)

    def test_missing_entry_in_config(self):
        test_config = copy.deepcopy(config)
        del test_config['datasource_enriched_data']

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(self.model_dict['abtauzyklus'])
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        with self.assertRaises(ex.ConfigException):
            apply_classifier(test_config)

    def test_for_Index_error(self):
        pass

    def test_for_not_fitted_classifier(self):
        test_config = copy.deepcopy(config)

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(KNeighborsClassifier())
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        with self.assertRaises(ex.SklearnException):
            apply_classifier(test_config)

    def test_for_predicting_input_has_nans(self):
        test_config = copy.deepcopy(config)

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(self.model_dict['abtauzyklus'])
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY, ANY)

        df_nan = self.df_test.copy()
        df_nan.iat[1, 1] = np.NaN
        when2(pd.DataFrame.copy).thenReturn(df_nan)

        with self.assertRaises(ex.SklearnException):
            apply_classifier(test_config)

    def test_writing_to_database_causes_exception(self):
        test_config = copy.deepcopy(config)
        test_config['datasource_classified_data'][
            'measurement'] = "Kein_measurement"

        when2(training.enrich_data, ANY).thenReturn(0)
        when2(rm.read_query, ANY, ANY).thenReturn(self.df_test)
        when2(mp.load_classifier, ANY,
              'abtauzyklus').thenReturn(self.model_dict['abtauzyklus'])
        when2(mp.load_classifier, ANY, 'warmwasseraufbereitung').thenReturn(
            self.model_dict['warmwasseraufbereitung'])
        when2(mp.persist_classifier, ANY, ANY, ANY)
        when2(wm.write_dataframe, ANY, ANY,
              'Kein_measurement').thenRaise(ex.DBException)

        with self.assertRaises(ex.DBException):
            apply_classifier(test_config)