def imputation(data, threshold="3600S"):
    """
    Goes through the dataset and saves a data point and his predecessor in a dictionary if
    the time difference between them is bigger than a given threshold
    Name in documentation: imputation
    :param data: The data that should be checked as Pandas.Series
    :param threshold: The threshold given in second
    :return: The data points as a dictionary
    :exception NoDataException: Exception is thrown when data is empty
    :exception InvalidConfigValueException: Exception which is thrown if the threshold is to low.
    """

    if data.empty:
        raise exc.NoDataException("Data mustn't be empty")
    if int(threshold[:-1]) < 1:
        raise exc.InvalidConfigValueException(
            'Threshold must be greater than 1.')

    imp_frame = pd.DataFrame(columns=["from", "to"])
    time_index = pd.Series(data.index)
    time_delta = time_index.diff()
    threshold = pd.to_timedelta(threshold)
    froms = []
    tos = []
    for i in range(1, len(time_delta)):
        if pd.to_timedelta(time_delta.values[i]) > threshold:
            froms.append(time_index.get(i - 1))
            tos.append(time_index.get(i))
    imp_frame["from"] = froms
    imp_frame["to"] = tos
    return imp_frame
def convert_time(time_var):
    """Convert a given date and time to unix timestamp
   :param time_var: date and time to convert
   :raises InvalidConfigValueException Raised if the parameter timeframe from the config is wrong
   :return int: The converted time as unix timestamp"""
    try:
        time_var = datetime.strptime(time_var, "%Y-%m-%d %H:%M:%S.%f %Z")
    except Exception:
        raise exceptions.InvalidConfigValueException(
            "Timeframe value in Config wrong")

    return int((time.mktime(time_var.timetuple()))) * 1000
Exemple #3
0
def convert_time(time_var):
    '''
    Convert the time UTC.
    :param time_var: The UTC time as String.
    :raises: ConfigException: For incorrect Config.
    :return: the converted time.
    '''
    try:
        time_var = datetime.strptime(time_var, "%Y-%m-%d %H:%M:%S.%f %Z")
        return int((time.mktime(time_var.timetuple())))*1000
    except:
        raise exe.InvalidConfigValueException("Filtern Config format is not correct. Convert_time() failed", 900)
def resample(data, freq='60S'):
    """
    Method to resample the given data with the given frequency. This is necessary for an union time intervall.
    Name in documentation: resample
    :param data: given data as pandas Series which should be resampled in this method.
    :param freq: A frequency, needed for the resampling, it is used to generate the union time intervall.
    :return dsata.resample(freq).asfreq(): resampled data with an union time intervall as a pandas Series.
    :exception NoDataException: Exception is thrown when data is empty
    :exception InvalidConfigValueException: Exception which is thrown if the frequency is to low.
    """
    if data.empty:
        raise exc.NoDataException("Data mustn't be empty")
    if int(freq[:-1]) < 1:
        raise exc.InvalidConfigValueException(
            'Resample frequency cannot be less than 1S')

    data = data[~data.index.duplicated()]
    return data.resample(freq).asfreq()
def rolling_mean(data, frame_width=100):
    """
    Name in documentation: rolling_mean()
    Applies the rolling mean on the data based on a given frame width
    Name in documentation: rolling_mean
    :param data: The data on which the rolling mean should be applied on as a pandas Series
    :param frame_width: The window width that should be used for the rolling mean
    :return: The data on which the rolling mean was applied on
    :exception NoDataException: Exception is thrown when data is empty
    :exception InvalidConfigValueException: Exception which is thrown if the framewidth is to low.
    """

    if data.empty:
        raise exc.NoDataException("Data mustn't be empty")
    if frame_width < 0:
        raise exc.InvalidConfigValueException(
            'Framewidth cannot be less than 0 ')

    return data.rolling(frame_width, center=True).mean()
def train_classifier(config):
    """
    Name in documentation: klassifizierer_trainieren()

    Train a classifier to identify a specific event.
    :param classification_config: Contains parameters for training the classifier
    :param classifier:(sklearn-object) a classification algorithm form the sklearn package
    :raises InvalidConfigValueException:Raised if a value inside of the config is wrong
    :raises  PersistorException: Raised if classifier is not an instance of sklearn
    :return int: Status code that indicates whether the training was successful(0 Success, 1 Failure)"""
    try:
        selected_event, required_score, test_size, datasource_marked_data, start_time, end_time, events = get_config_parameter(
            config)
    except Exception:
        raise exce.InvalidConfigValueException
    logger.info("config parameter loaded")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise exce.InvalidConfigValueException(str(e))
    df = read_manager.read_query(
        datasource_marked_data,
        f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
    )
    for event in events:
        end_start = markers[event]
        start_event = list(end_start.keys())[0]
        end_event = list(end_start.values())[len(end_start) - 1]
        if (str(df.index[0]) > start_event) or (str(df.index[-1]) < end_event):
            raise exce.ConfigException(
                'time frame of trainingsdata not in selected data frame included'
            )
        df_copy = df.copy()[start_event:end_event]

        try:
            classifier = model_persistor.load_classifier(config, event, True)
        except Exception as e:
            raise exce.PersistorException(str(e))
        logger.info("model loaded")
        df_copy.dropna(inplace=True)
        y = np.array(df_copy[event])
        for drop_event in events:
            df_copy = df_copy.drop(labels=[drop_event, f"{drop_event}_marker"],
                                   axis=1)
        X = df_copy.to_numpy()
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=test_size)
        except Exception as e:
            raise exce.SklearnException(str(e))
        try:
            classifier = classifier.fit(X_train, y_train)
        except Exception as e:
            raise exce.SklearnException(str(e))
        logger.info("model trained")
        if evaluate_classifier(classifier, required_score[event], X_test,
                               y_test):
            model_persistor.persist_classifier(classifier, config, event)
            logger.info('model persisted')
        else:
            logger.info('score too low, model not persisted')
    return 0
Exemple #7
0
def mark_data(config):
    """
    Name in documentation: daten_markieren()
    Mark the data with the occurrences of the selected event

    :param config: relevant parameters provided by the API

    :raises InvalidConfigValueException :Raised if a value inside of the config is wrong
    :raises InvalidConfigKeyException :Raised if a key in the config does not exist
    :raises DBException :if there are problems with the database connection

    :return int: Status code that indicates whether the marking was successful(0 Success, 1 Failure)
    """
    try:
        selected_event, datasource_raw_data, measurement_raw, start_time, end_time, register_dict, \
        required_registers, datasource_enriched_data, datasource_marked_data, \
        measurement_enriched, events, datasource_predicted_data, measurement_predicted = get_config_parameter(config)
    except Exception as e:
        raise ex.InvalidConfigKeyException("Key " + str(e) +
                                           " was not found in config")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))
    try:
        df = read_manager.read_query(
            datasource_enriched_data,
            f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
        )
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info('enriched_data loaded')
    for event in events:
        spaces = 0
        df[f"{event}_marker"] = 0.0  # Spalte des Event initialisieren
        points_for_event = markers[event]  # dictionary für das event holen
        for key in points_for_event.keys():
            if (str(df.index[0]) < key < str(df.index[-1])) and (
                    str(df.index[0]) < points_for_event[key] < str(
                        df.index[-1])
            ):  # überprüfen, ob key und value im Dataframe sind
                df.loc[key, f"{event}_marker"] = 1.0  # key ist der Startpunkt
                df.loc[points_for_event[key],
                       f"{event}_marker"] = -1.0  # value ist Endpunkt
        spaces = df.loc[(df[f"{event}_marker"] == 1.0) |
                        (df[f"{event}_marker"] == -1.0)].index.tolist()
        df[event] = 0.0
        if (len(spaces) % 2) == 1:
            raise ex.InvalidConfigValueException(
                'unable to create correct trainingsdata for this timeframe with given event values'
            )
        for i in range(0, len(spaces), 2):
            df.loc[spaces[i]:spaces[i + 1], event] = 1.0
    try:
        write_manager.write_dataframe(datasource_marked_data, df,
                                      selected_event)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info(f"marked_data for {selected_event} successfully persisted")
    return 0
Exemple #8
0
def enrich_data(config):
    """
    Name in documentation: daten_erweitern()
    Enrich data to include values to be able to train the classifier or to classify the data afterwards

    :raises InvalidConfigValueException: Raised if a value inside of the config is wrong
    :raises InvalidConfigKeyException :Raised if a key in the config does not exist
    :raises DBException: if there are problems with the database connection
    :return int: Status code that indicates whether the enriching was successful(0 Success, 1 Failure)
    """

    try:
        selected_event, datasource_raw_data, measurement_raw, start_time, end_time, register_dict, \
        required_registers, datasource_enriched_data, datasource_marked_data, \
        measurement_enriched, events, datasource_predicted_data, measurement_predicted = get_config_parameter(config)
    except Exception as e:
        raise ex.InvalidConfigKeyException("Key " + str(e) +
                                           " was not found in config")
    try:
        start = convert_time(start_time)
        end = convert_time(end_time)
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))
    counter = 0
    try:
        if selected_event == 'standard':
            df_query = read_manager.read_query(
                datasource_raw_data,
                f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms"
            )
        if selected_event == 'pred':
            df_query = read_manager.read_query(
                datasource_predicted_data,
                f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms"
            )
        df_query = df_query.astype('float64')
        if 'historic_weatherdata' in df_query.columns:
            df_query = df_query.drop(['historic_weatherdata'], axis=1)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info('raw_data loaded')
    for register in required_registers:
        if register_dict[register] not in df_query.columns:
            raise ex.InvalidConfigValueException(
                register_dict[register] + ' not found in dataframe columns')

        df_query[f'{register_dict[register]}_deriv'] = (
            df_query[f'{register_dict[register]}'].shift(-1) -
            (df_query[f'{register_dict[register]}'].shift(1))) / 2
        df_query[f'{register_dict[register]}_pct_ch'] = df_query[
            f'{register_dict[register]}'].pct_change(1)
        df_query[f'{register_dict[register]}_ch_abs'] = df_query[
            f'{register_dict[register]}'].diff(1)
        df_query[f'{register_dict[register]}_diff'] = df_query[f'{register_dict[register]}'] - \
                                                      df_query[f'{register_dict[register]}'].shift(-1)
        if counter == 0:
            df = df_query
            counter += 1
        else:
            df[f'{register_dict[register]}'] = df_query[
                f'{register_dict[register]}']
            df[f'{register_dict[register]}_deriv'] = df_query[
                f'{register_dict[register]}_deriv']
            df[f'{register_dict[register]}_pct_ch'] = df_query[
                f'{register_dict[register]}_pct_ch']
            df[f'{register_dict[register]}_ch_abs'] = df_query[
                f'{register_dict[register]}_ch_abs']
            df[f'{register_dict[register]}_diff'] = df_query[
                f'{register_dict[register]}_diff']
    try:
        write_manager.write_dataframe(datasource_enriched_data, df,
                                      selected_event)
    except Exception as e:
        raise ex.DBException(str(e))
    logger.info(
        f'enriched_data for {measurement_enriched} successfully persisted')
    return 0
Exemple #9
0
def apply_classifier(config):
    """
    Name in documentation: klassifizierer_anwenden()

    Marks the occurrences of the selected event in the data with the use of the classifier

    :param config: Dictionary from which the parameters will be extracted

    :raises ConfigTypeException: if the message body contains an invalid config
    :raises InvalidConfigValueException:
    :raises DBException: if there are problems with the database connection
    :raises SKLearnException: if there are problems with Sklearn methods

    :return int: Status code that indicates whether the classifying was successful(0 Success, 1 Failure)
    """

    logger.info("starting classification...")
    if not isinstance(config, dict):
        raise ex.ConfigTypeException(
            "Wrong data structure of configuration: " + str(config))

    trainingsdata_editing_engine.enrich_data(config)
    logger.info("data enriched...")

    datasource_enriched_data, \
        datasource_classified_data, \
        timeframe, selected_event, \
        measurement_classified, \
        datasource_raw_data, \
        measurement_raw, \
        events, \
        measurement_enriched, \
        datasource_predicted_data, \
        measurement_predicted = get_config_parameter(config)

    logger.info("Fetched relevant data...")

    try:
        start = convert_time(timeframe[0])
        end = convert_time(timeframe[1])
    except Exception as e:
        raise ex.InvalidConfigValueException(str(e))

    try:
        df_query = read_manager.read_query(
            datasource_enriched_data,
            f"SELECT * FROM {selected_event} WHERE time >= {start}ms AND time <= {end}ms"
        )
        if selected_event == 'pred':
            df_raw = read_manager.read_query(
                datasource_predicted_data,
                f"SELECT * FROM {measurement_predicted} WHERE time >= {start}ms AND time <= {end}ms"
            )
        else:
            df_raw = read_manager.read_query(
                datasource_raw_data,
                f"SELECT * FROM {measurement_raw} WHERE time >= {start}ms AND time <= {end}ms"
            )

        df_raw.dropna(inplace=True)

        df_raw = df_raw.drop(df_raw.index[-1])
        df_raw = df_raw.drop(df_raw.index[0])
    except Exception:
        raise ex.DBException("Exception in read_manager")

    for event in events:
        df = df_query.copy()
        model = model_persistor.load_classifier(config, event)
        logger.info("Model geladen...")
        try:
            df.dropna(inplace=True)
        except IndexError:
            raise ex.DBException('Wrong query')
        classified_data_df = df.copy()
        try:
            classified_data_df[event] = model.predict(df)
            logger.info("Daten klassifiziert...")
        except sklearn.exceptions.NotFittedError:
            raise ex.SklearnException("Classifier not fitted")
        except ValueError:
            raise ex.SklearnException(
                "Input contains NaN, infinity or a value too large for dtype('float64')"
            )

        df_raw[event] = classified_data_df[event]
    if 'warmwasseraufbereitung' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung'].index.hour < 22),
                   'warmwasseraufbereitung'] = 0

    if 'warmwasseraufbereitung_pred' in df_raw.columns:
        df_raw.loc[(df_raw['warmwasseraufbereitung_pred'].index.hour > 8) &
                   (df_raw['warmwasseraufbereitung_pred'].index.hour < 22),
                   'warmwasseraufbereitung_pred'] = 0

    try:
        if selected_event == 'pred':
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          selected_event)
        else:
            write_manager.write_dataframe(datasource_classified_data, df_raw,
                                          measurement_classified)
        logger.info("Daten persistiert. Klassifizierung abgeschlossen")
    except Exception:
        raise ex.DBException("Exception in write_manager")

    return 0
Exemple #10
0
def config_validation(filtern_config):
    """
    Name in documentation: 'config_validation'
    Validate the config. It is checked whether the values for "delete" are only "True" and "False"
    and whether the values for "Interpolation" are only "linear", "cubic", "spline" and "akima".
    Also checks if every curve and cycle is in the config.
    If this is not the case, an ConfigException is thrown.
    :raises: ConfigExeption: For incorrect Config.
    :raises: InvalidConfigKeyException: For wrong keys in Config.
    :raises: InvalidConfigValueException: For wrong Values in Config.
    :raises: IncompleteConfigException: For missing anything in Config.
    :param filtern_config: The filtern_config:
    """

    try:
        config = filtern_config["filter_options"][
            filtern_config["selected_value"]]
        timeframe = filtern_config['timeframe']
    except:
        raise exe.InvalidConfigKeyException("Can not read Filtern Config.",
                                            900)

    expected_curve = [
        'room', 'condenser', 'evaporator', 'inlet', 'outlet', 'freshAirIntake'
    ]
    for curve in config:
        if curve in expected_curve:
            expected_curve.remove(curve)

        expected_cycle = [
            'warmwasseraufbereitung', 'ofennutzung', 'abtauzyklus',
            'luefterstufen'
        ]
        for cycle in config[curve]:
            expected_delete_interpolation = ['delete', 'Interpolation']

            for delete_interpolation in config[curve][cycle]:
                if delete_interpolation in expected_delete_interpolation:
                    expected_delete_interpolation.remove(delete_interpolation)

            if expected_delete_interpolation != []:
                raise exe.InvalidConfigKeyException(
                    "Filtern Config got no Interpolation or Delete.", 900)

            if cycle in expected_cycle:
                expected_cycle.remove(cycle)
            if config[curve][cycle]["delete"] != 'True' and config[curve][
                    cycle]["delete"] != 'False':
                raise exe.InvalidConfigValueException(
                    "Filtern Config Delete is not True or False.", 900)
            if config[curve][cycle]["Interpolation"] != 'linear' and config[
                    curve][cycle]["Interpolation"] != 'cubic' and config[
                        curve][cycle]["Interpolation"] != 'spline' and config[
                            curve][cycle]["Interpolation"] != 'akima':
                raise exe.InvalidConfigValueException(
                    "Filtern Config Interpolation is not linear, cubic, spline or akima.",
                    900)

        if expected_cycle != []:
            raise exe.IncompleteConfigException(
                "Filtern Config missing a cycle.", 900)

    if expected_curve != []:
        raise exe.IncompleteConfigException("Filtern Config missing a curve.",
                                            900)

    for time in timeframe:
        if not re.search(
                r"[0-9][0-9][0-9][0-9][-][0-1][0-9][-][0-3][0-9][ ][0-2][0-9][:][0-6][0-9][:][0-6][0-9][.][0-9][0-9][0-9][ ][U][T][C]",
                time):
            raise exe.ConfigException(
                "Filtern Config Timeframe Format is not correct.", 900)