Beispiel #1
0
def test_return_str():
    mylogging.config.COLOR = 1
    try:
        raise Exception(mylogging.return_str("asdas", caption="User"))
    except Exception:
        pass

    assert mylogging.return_str("asdas", caption="User")
Beispiel #2
0
def lnu_core(
    data: tuple[np.ndarray, np.ndarray],
    learning_rate: float,
    epochs: int,
    normalize_learning_rate: bool,
    early_stopping: bool = True,
    learning_rate_decay: float = 0.8,
    damping: int | float = 1,
    return_all: bool = False,
) -> np.ndarray | tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    X = data[0]
    y_hat = data[1]

    y = np.zeros(len(y_hat))
    error = np.zeros(len(y_hat)) if return_all else np.zeros(1)
    w = np.zeros(X.shape[1])
    last_running_error = np.inf

    w_all = np.zeros((X.shape[1], X.shape[0])) if return_all else None

    for epoch in range(epochs):

        running_error = np.zeros(1)

        for j in range(X.shape[0]):

            current_index = j if return_all else 0

            y[j] = np.dot(w, X[j])
            if y[j] > y_hat.max() * 10e6:
                raise RuntimeError(mylogging.return_str("Model is unstable"))

            error[current_index] = y_hat[j] - y[j]
            running_error[0] = running_error[0] + abs(error[current_index])

            dydw = X[j]
            if normalize_learning_rate:
                minorm = learning_rate / (damping + np.dot(X[j], X[j].T))
                dw = minorm * error[current_index] * dydw
            else:
                dw = learning_rate * error[current_index] * dydw
            w = w + dw

            if return_all:
                w_all[:, j] = w

        if (early_stopping and epoch > 1) and (
            sum(np.abs(dw)) / len(w) < 10e-8 or ((running_error[0] / len(y_hat)) - last_running_error) < 10e-5
        ):
            break

        last_running_error = running_error[0] / len(y_hat)

        if learning_rate_decay:
            learning_rate = learning_rate * learning_rate_decay

    if return_all:
        return w, w_all, y, error
    else:
        return w
Beispiel #3
0
def get_inputs(
    input: tuple[np.ndarray, np.ndarray] | Sequences
) -> tuple[np.ndarray, np.ndarray]:

    if isinstance(input, Sequences):
        return input[0], input[1]

    if not isinstance(input, tuple):
        raise TypeError(
            mylogging.return_str(
                "Data must be tuple of length 2 - input vector and output vector."
            ))

    if len(input) != 2:
        raise ValueError(
            mylogging.return_str(
                "Data must be tuple of length 2 - input vector and output vector."
            ))

    return input[0], input[1]
Beispiel #4
0
def get_optimizers_loses_activations():
    """Return list of tensorflow optimizers. It's used by optimize function.

    Returns:
        list: List of tensorflow optimizers.
    """

    if not importlib.util.find_spec("tensorflow"):
        raise ModuleNotFoundError(
            mylogging.return_str(
                "Tensorflow model configured, but tensorflow library not installed. It's not "
                "in general requirements, because very big and not work everywhere. If you "
                "want to use tensorflow model, install it via \n\n`pip install tensorflow`"
            )
        )

    import tensorflow as tf

    sgd = tf.keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    rmsprop = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, epsilon=None, decay=0.0)
    adagrad = tf.keras.optimizers.Adagrad(learning_rate=0.01, epsilon=None, decay=0.0)
    adadelta = tf.keras.optimizers.Adadelta(learning_rate=1.0, rho=0.95, epsilon=None, decay=0.0)
    adam = tf.keras.optimizers.Adam(
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=None,
        decay=0.0,
        amsgrad=False,
    )
    adamax = tf.keras.optimizers.Adamax(
        learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0
    )
    nadam = tf.keras.optimizers.Nadam(
        learning_rate=0.002,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=None,
        schedule_decay=0.004,
    )

    return [sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam]
Beispiel #5
0
def get_eeg(n=1000):
    """Download real EEG data.

    Args:
        n (int, optional): Length of data. Defaults to 1000.

    Returns:
        np.ndarray: Slope test data.
    """

    if not importlib.util.find_spec("wfdb"):
        raise ModuleNotFoundError(
            mylogging.return_str(
                "For parsing EEG signal, wfdb library is necessary. Install with `pip install wfdb`"
            )
        )

    import wfdb

    return wfdb.rdrecord("a103l", pn_dir="challenge-2015/training/", channels=[1], sampto=n).p_signal
Beispiel #6
0
def train(
    data: tuple[np.ndarray, np.ndarray],
    layers: Literal["lstm", "mlp"] | list[tuple[str, dict]] = "mlp",
    epochs: int = 100,
    load_trained_model: bool = True,
    update_trained_model: bool = True,
    save_model: bool = True,
    saved_model_path_string: str = "stored_models",
    optimizer: str = "adam",
    loss: str = "mse",
    summary: bool = False,
    verbose=0,
    used_metrics="accuracy",
    timedistributed=False,
    batch_size=64,
):
    """Tensorflow model. Neural nets - LSTM or MLP (dense layers). Layers are customizable with arguments.

    Args:
        data (tuple[np.ndarray, np.ndarray]) - Tuple (X, y) of input train vectors X and train outputs y
        layers (Literal["lstm", "mlp"] | list[tuple[str, dict]], optional) - List of tuples of layer name (e.g. 'lstm') and layer params dict e.g.
            (("lstm", {"units": 7, "activation": "relu"})). Check default layers list here for example.
            There are also some predefined architectures. You can use 'lstm' or 'mlp'. Defaults to 'mlp'.
        epochs (int, optional): Number of epochs to evaluate. Defaults to 100.
        load_trained_model (bool, optional): If True, load model from disk. Most of time is spend
            on training, so if loaded and not updated, it's very fast. Defaults to True.
        update_trained_model (bool, optional): Whether load_trained_model, it's updated with new input.
            Defaults to True.
        save_model (str, optional): If True, save model on disk on saved_model_path_string. Defaults to True.
        saved_model_path_string (str, optional): Full path to saved model with name. E.g. '/home/dan/mymodel.h5.
        If 'stored_models', then it's save to library folder models/stored_models. Defaults to 'stored_models'.
        optimizer (str, optional): Used optimizer. Defaults to 'adam'.
        loss (str, optional): Loss function. Defaults to 'mse'.
        summary (int, optional): Display model details table. Defaults to 0.
        verbose (int, optional): Whether display progress bar. Defaults to 0.
        used_metrics (str, optional): Used metrics. 'accuracy' or 'mape' Defaults to 'accuracy'.
        timedistributed (bool, optional): Whether add time distributed layer. Defaults to False.
        batch_size (int, optional): Used batch size. Defaults to 64.

    Returns:
        model: Trained model object.
    """

    if not importlib.util.find_spec("tensorflow"):
        raise ModuleNotFoundError(
            mylogging.return_str(
                "Tensorflow model configured, but tensorflow library not installed. It's not "
                "in general requirements, because very big and not work everywhere. If you "
                "want to use tensorflow model, install it via \n\n`pip install tensorflow`"
            )
        )

    import tensorflow as tf
    from tensorflow.keras import Sequential
    from tensorflow.keras import layers as tf_layers
    from tensorflow.keras import metrics as tf_metrics
    from tensorflow.keras import models as tf_models
    from tensorflow.keras import Model as tf_model_type

    X, y = get_inputs(data)

    X_ndim = X.ndim

    models = {
        "dense": tf_layers.Dense,
        "lstm": tf_layers.LSTM,
        "mlp": tf_layers.Dense,
        "gru": tf_layers.GRU,
        "conv2d": tf_layers.Conv2D,
        "rnn": tf_layers.SimpleRNN,
        "convlstm2d": tf_layers.ConvLSTM2D,
        "dropout": tf_layers.Dropout,
        "batchnormalization": tf_layers.BatchNormalization,
    }

    if used_metrics == "accuracy":
        metrics = [tf_metrics.Accuracy()]
    elif used_metrics == "mape":
        metrics = [tf_metrics.MeanAbsolutePercentageError()]
    else:
        raise ValueError("metrics has to be one from ['accuracy', 'mape']")

    if saved_model_path_string == "stored_models":
        saved_model_path_string = str(Path(__file__).resolve().parent / "stored_models" / "tensorflow.h5")

    if load_trained_model:
        try:
            model = tf_models.load_model(saved_model_path_string)
            model = cast(tf_model_type, model)
            model.load_weights(saved_model_path_string)

        except Exception:
            raise NameError("Model is not saved, first save_model = 1 in config")

        if update_trained_model:
            model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose)

    else:

        if isinstance(layers, str):
            if layers == "lstm":
                layers = [
                    ("lstm", {"units": 32, "activation": "relu", "return_sequences": 1}),
                    ("dropout", {"rate": 0.1}),
                    ("lstm", {"units": 7, "activation": "relu"}),
                ]

            elif layers == "mlp":
                layers = [
                    ("dense", {"units": 32, "activation": "relu"}),
                    ("dropout", {"rate": 0.1}),
                    ("dense", {"units": 7, "activation": "relu"}),
                ]

            else:
                raise ValueError(
                    mylogging.return_str("Only possible predefined layers are 'lstm' and 'mlp'.")
                )

            layers = cast(list[tuple[str, dict[str, Any]]], layers)

        if layers[0][0] == "lstm":
            if X.ndim == 2:
                X = X.reshape(X.shape[0], X.shape[1], 1)
            layers[0][1]["input_shape"] = (X.shape[1], X.shape[2])

        elif layers[0][0] == "dense":
            layers[0][1]["input_shape"] = (X.shape[1],)
            if X.ndim > 2:
                raise ValueError(
                    mylogging.return_str(
                        "For dense first layer only univariate data supported (e.g. shape = (n_samples, n_features))"
                        "if ndim > 2: serialize first."
                    )
                )

        model = Sequential()

        for i in layers:
            model.add(models[i[0]](**i[1]))

        if timedistributed == 1:
            model.add(tf_layers.TimeDistributed(tf_layers.Dense(y.shape[1])))
        else:
            model.add(tf_layers.Dense(y.shape[1]))

        model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

        if summary:
            model.summary()

        model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose)

    if save_model == 1:
        model.save(saved_model_path_string)

    model.layers_0_0 = layers[0][0]

    model.X_ndim = X_ndim
    model.y_shape_1 = y.shape[1]

    return model
Beispiel #7
0
def predict_multiple_columns(
    data=None,
    predicted_columns: list | tuple | str | None = None,
    freqs: list | tuple | str | None = None,
    config: predictit.configuration.Config | dict | None = None,
    **kwargs,
) -> predictit._result_classes.Multiple:
    """Predict multiple columns and multiple frequencies at once. Use predict function.

    Only data and predicted_columns can be positional.

    Check README or tests for working examples.

    Args:
        data (np.ndarray, pd.DataFrame): Time series. Can be 2-D - more columns.
            !!! In Numpy array use data series as rows, but in dataframe use cols !!!. Defaults to [].
        predicted_columns (list | tuple | str | None, optional): List of indexes of predicted columns or it's names (dataframe).
            Defaults to None.
        freqs (list | tuple | str | None, optional): If date index available, resample data and predict in defined
            time frequency. If None, then value from config will be used. Defaults to [].
        config (predictit.configuration.Config | dict | None, optional): Settings as Config instance or dictionary.
            Check class for what you can use. If None, then default config will be used. Defaults to None.
        **kwargs (dict, optional): There is much more parameters' in this function. Check configuration.py
            for parameters details.

    Returns:
        np.ndarray: All the predicted results.
    """

    if config is None or isinstance(config, dict):
        update_config = config
        config = config_default
        config = config.copy()
        if update_config:
            config.update(update_config)

    elif isinstance(config, predictit.configuration.Config):
        config = config.copy()

    # Edit configuration.py default values with arguments values if exist
    if data is not None:
        config.data = data

    if predicted_columns is not None:
        config.predicted_columns = predicted_columns

    if freqs is not None:
        config.freqs = freqs

    if not config.predicted_columns or not isinstance(config.predicted_columns,
                                                      list):
        raise TypeError(
            mylogging.return_str(
                "predict_multiple function need predicted_columns config value to be list."
            ))

    config.update(kwargs)

    predictit._helpers.logger_init_from_config(config.output.logger_subconfig)

    if not config.data_input.freqs:
        freqs = ["Default frequency"]
    else:
        freqs = config.data_input.freqs

    if config.predicted_columns in ["*", ["*"]]:

        if isinstance(config.data, str):
            config.data = mdp.load_data.load_data(
                config.data,
                header=config.header,
                csv_style=config.csv_style,
                predicted_table=config.predicted_table,
                max_imported_length=config.max_imported_length,
                request_datatype_suffix=config.request_datatype_suffix,
                data_orientation=config.data_orientation,
            )

        config.predicted_columns = mdp.preprocessing.data_consolidation(
            config.data).columns

    results = {}
    best_predictions_dataframes = {}

    for fi, f in enumerate(freqs):

        result_dataframe = pd.DataFrame()

        for ci, c in enumerate(config.predicted_columns):

            config.predicted_column = c
            config.freq = f

            result_name = f"Column: {c}" if len(
                freqs) == 1 else f"Column: {c} - Freq: {f}"

            try:
                results[result_name] = predict(config=config)

                result_dataframe[c] = results[result_name].best_prediction

            except Exception:
                mylogging.traceback(
                    f"Error in making predictions on column {c} and freq {f}",
                    level="ERROR",
                )

        best_predictions_dataframes[f"Freq: {f}"] = result_dataframe

    return predictit._result_classes.Multiple(
        best_predictions_dataframes=best_predictions_dataframes,
        results=results)
Beispiel #8
0
def predict(
    data=None,
    predicted_column: None | str | int = None,
    config: predictit.configuration.Config | dict | None = None,
    **kwargs,
) -> predictit._result_classes.Result:
    """Make predictions mostly on time-series data. Data input and other config options can be set up in
    configuration.py or overwritten on the fly. Setup can be also done as function input arguments or as command line
    arguments (it will overwrite config values).

    There are working examples in main readme and also in test_it module.

    Function can be configured from with config from configuration, with command line arguments as wel as with
    function parameters. There are only two possible positional parameters - `data` and `predicted_column`. Rest of
    parameters must be named parameters. Params are not documented here, because all config params works here in
    function passed as kwargs.

    Args:
        data (np.ndarray, pd.DataFrame, str): Time series. Can be 2-D - more columns. Can be numpy array, DataFrame,
            path to file or url.
            Examples: "/home/user/my.json", or "https://yoururl/your.csv" or np.random.randn(100, 2).
        predicted_column (None | str | int, optional): Index of predicted column or it's name (dataframe).
            If list with more values only the first one will be evaluated (use predict_multiple_columns function
            if you need that. Default to None.
        config (predictit.configuration.Config | dict | None, optional): Settings as Config instance or dictionary.
            Check class for what you can use. If None, then default config will be used. Defaults to None.
        **kwargs (dict, optional): There is much more parameters' of predict function. Check configuration.py
            for parameters details.

    Returns:
        Depend on 'return_type' config value - return best prediction {np.ndarray}, all models results {np.ndarray},
        detailed results{dict} or interactive plot or print tables of results

    """

    from mypythontools.plots import plot

    if config is None or isinstance(config, dict):
        update_config = config
        config = config_default
        config = config.copy()
        if update_config:
            config.update(update_config)

    elif isinstance(config, predictit.configuration.Config):
        config = config.copy()

    if config.use_config_preset and config.use_config_preset != "none":
        updated_config = config.presets[config.use_config_preset]
        config.update(updated_config)

    # Edit configuration.py default values with arguments values if exist
    if data is not None:
        config.data = data

    if predicted_column is not None:
        config.predicted_column = predicted_column

    config.update(kwargs)

    predictit._helpers.logger_init_from_config(config.output.logger_subconfig)

    # Do not repeat actually mean evaluate once
    if not config.repeatit:
        config.repeatit = 1

    _GUI = GLOBAL_VARS.GUI

    # Add everything printed + warnings to variable to be able to print in GUI
    if _GUI:
        stdout = sys.stdout
        sys.stdout = io.StringIO()

    # Don't want to define in gui condition, so if not gui, do nothing
    if _GUI:

        def update_gui(content, html_id):
            try:
                predictit.gui_start.edit_gui_py(content, html_id)
            except (Exception, ):
                pass

    else:

        def update_gui(content, html_id):
            pass

    # Definition of the table for spent time on code parts
    time_df = []

    def update_time_table(time_last):
        time_df.append([progress_phase, round((time.time() - time_last), 3)])
        return time.time()

    time_point = time_begin = time.time()

    ###############
    ### ANCHOR ### Data
    #############

    progress_phase = "Data loading and preprocessing"
    update_gui(progress_phase, "progress_phase")

    data = mdp.load_data.load_data(
        config.data,
        header=config.header,
        csv_style=config.csv_style,
        predicted_table=config.predicted_table,
        max_imported_length=config.max_imported_length,
        request_datatype_suffix=config.request_datatype_suffix,
        data_orientation=config.data_orientation,
    )

    ###############
    ### ANCHOR ### Data consolidation
    #############

    if not config.predicted_column:
        config.predicted_column = 0

    data_for_predictions_df = mdp.preprocessing.data_consolidation(
        data,
        predicted_column=config.predicted_column,
        other_columns=config.other_columns,
        datalength=config.datalength,
        datetime_column=config.datetime_column,
        unique_threshold=config.unique_threshold,
        embedding=config.embedding,
        freq=config.freq,
        resample_function=config.resample_function,
        remove_nans_threshold=config.remove_nans_threshold,
        remove_nans_or_replace=config.remove_nans_or_replace,
        dtype=config.dtype,
    )

    # In data consolidation predicted column was replaced on index 0 as first column
    predicted_column_index = 0
    predicted_column_name = data_for_predictions_df.columns[0]

    ###############
    ### ANCHOR ### Analyze original data
    #############

    column_for_predictions_series = data_for_predictions_df.iloc[:, 0:1]
    results = {}
    data_inputs = []

    if config.mode == "validate":
        column_for_predictions_series = column_for_predictions_series.iloc[:
                                                                           -config
                                                                           .
                                                                           output
                                                                           .
                                                                           predicts, :]
        config.repeatit = 1

    for i in config.used_models:
        data_inputs.append(config.models_input[i])
    data_inputs = set(data_inputs)

    if config.analyzeit == 1 or config.analyzeit == 3:
        print("Analyze of unprocessed data")
        try:
            predictit.analyze.analyze_column(data_for_predictions_df.values[:,
                                                                            0],
                                             window=30)
            predictit.analyze.analyze_data(data_for_predictions_df)
            predictit.analyze.decompose(
                data_for_predictions_df.values[:, 0],
                **config.analyze_seasonal_decompose,
            )
        except Exception:
            mylogging.traceback("Analyze failed", level="ERROR")

    semaphor = None

    if config.multiprocessing:

        multiprocessing.freeze_support()

        if not config.processes_limit:
            config.processes_limit = multiprocessing.cpu_count() - 1

        if config.multiprocessing == "process":
            pipes = []
            semaphor = multiprocessing.Semaphore(config.processes_limit)

        elif config.multiprocessing == "pool":
            pool = multiprocessing.Pool(config.processes_limit)

            # It is not possible easy share data in multiprocessing, so results are resulted via callback function
            def return_result(result):
                for i, j in result.items():
                    results[i] = j

    ### Optimization loop

    if (not config.optimization or not config.optimization_variable
            or not config.optimization_values
            or len(config.optimization_values) == 1):
        config.variable_optimization.optimization = False
        config.optimization_values = ["Not optimized"]
        config.optimization_variable = None

    time_point = update_time_table(time_point)
    progress_phase = "Predict"
    update_gui(progress_phase, "progress_phase")

    models_indexed = {i: j for i, j in enumerate(config.used_models)}

    ###############
    ### ANCHOR ### Main loop
    #############

    for optimization_index, optimization_value in enumerate(
            config.optimization_values):

        # TODO check why setattr - may be wrong after config change
        if config.optimization_variable:
            setattr(config, config.optimization_variable, optimization_value)

        ###############
        ### ANCHOR ### Feature extraction
        #############

        if config.add_fft_columns:
            data_for_predictions_df = mdp.feature_engineering.add_frequency_columns(
                data_for_predictions_df,
                window=config.feature_engineering.add_fft_columns,
            )

        if config.data_extension:
            data_for_predictions_df = mdp.feature_engineering.add_derived_columns(
                data_for_predictions_df,
                **config.feature_engineering.data_extension)

            ###############
            ### ANCHOR ### Feature selection
            #############

            # data_for_predictions_df TODO

            ###############
            ### ANCHOR ### Data preprocessing
            #############

        if config.mode == "validate":
            test_unstandardized = mdp.misc.split(
                data_for_predictions_df, predicts=config.predicts)[1].values
            models_test_outputs_unstandardized = [test_unstandardized]

        else:
            models_test_outputs_unstandardized = mdp.create_model_inputs.create_tests_outputs(
                data_for_predictions_df.values[:, 0],
                predicts=config.predicts,
                repeatit=config.repeatit,
            )

        data_for_predictions, last_undiff_value, final_scaler = mdp.preprocessing.preprocess_data(
            data_for_predictions_df.values,
            remove_outliers=config.remove_outliers,
            smoothit=config.smoothit,
            correlation_threshold=config.correlation_threshold,
            data_transform=config.data_transform,
            standardizeit=config.standardizeit,
            bins=config.bins,
            binning_type=config.binning_type,
        )

        data_for_predictions = cast(np.ndarray, data_for_predictions)

        if config.mode == "validate":
            data_for_predictions, test = mdp.misc.split(
                data_for_predictions, predicts=config.predicts)
            models_test_outputs = [test]

        else:
            models_test_outputs = mdp.create_model_inputs.create_tests_outputs(
                data_for_predictions[:, 0],
                predicts=config.predicts,
                repeatit=config.repeatit,
            )

        column_for_predictions_processed = data_for_predictions[:,
                                                                predicted_column_index]

        data_shape = np.shape(data_for_predictions)
        data_length = len(column_for_predictions_processed)

        data_std = np.std(column_for_predictions_processed[-30:])
        data_mean = np.mean(column_for_predictions_processed[-30:])
        data_abs_max = max(
            abs(column_for_predictions_processed.min()),
            abs(column_for_predictions_processed.max()),
        )

        multicolumn = 0 if data_shape[1] == 1 else 1

        if (config.analyzeit == 2 or config.analyzeit == 3
            ) and optimization_index == len(config.optimization_values) - 1:

            print("\n\nAnalyze of preprocessed data\n")
            try:
                predictit.analyze.analyze_column(
                    column_for_predictions_processed, window=30)
                predictit.analyze.analyze_data(data_for_predictions)
                predictit.analyze.decompose(
                    column_for_predictions_processed,
                    **config.analyze_seasonal_decompose,
                )

            except Exception:
                mylogging.traceback("Analyze failed", level="ERROR")

        min_data_length = 3 * config.predicts + config.default_n_steps_in

        if (data_length < min_data_length or data_length <
                config.repeatit + config.default_n_steps_in + config.predicts):
            config.repeatit = 1
            min_data_length = 3 * config.predicts + config.default_n_steps_in

        assert min_data_length < data_length, mylogging.return_str(
            "Set up less predicted values in settings or add more data",
            caption="To few data",
        )

        for data_inputs_name in data_inputs:
            try:
                (
                    model_train_input,
                    model_predict_input,
                    model_test_inputs,
                ) = mdp.create_model_inputs.create_inputs(
                    data_for_predictions,
                    input_type_name=data_inputs_name,
                    input_type_params=config.data_inputs[data_inputs_name],
                    mode=config.mode,
                    predicts=config.predicts,
                    repeatit=config.repeatit,
                    predicted_column_index=predicted_column_index,
                )

            except Exception:
                mylogging.traceback(
                    f"Error in creating input type: {data_inputs_name} with option optimization: {optimization_value}",
                    level="WARNING",
                )
                continue

            for (iterated_model_index,
                 iterated_model_name) in models_indexed.items():
                iterated_model = predictit.models.models_assignment[
                    iterated_model_name]

                if config.models_input[
                        iterated_model_name] == data_inputs_name:

                    predict_parameters = {
                        "config": config.get_dict(),
                        # Functions to not import all modules
                        "preprocess_data_inverse":
                        mdp.preprocessing.preprocess_data_inverse,
                        "fitted_power_transform":
                        mdp.preprocessing.fitted_power_transform,
                        # Other
                        "iterated_model_train": iterated_model.train,
                        "iterated_model_predict": iterated_model.predict,
                        "iterated_model_name": iterated_model_name,
                        "iterated_model_index": iterated_model_index,
                        "optimization_index": optimization_index,
                        "optimization_value": optimization_value,
                        "model_train_input": model_train_input,
                        "model_predict_input": model_predict_input,
                        "model_test_inputs": model_test_inputs,
                        "models_test_outputs": models_test_outputs,
                        "models_test_outputs_unstandardized":
                        models_test_outputs_unstandardized,
                        "data_abs_max": data_abs_max,
                        "data_mean": data_mean,
                        "data_std": data_std,
                        "last_undiff_value": last_undiff_value,
                        "final_scaler": final_scaler,
                        "semaphor": semaphor,
                    }

                    if config.models_input[iterated_model_name] in [
                            "one_step",
                            "one_step_constant",
                    ]:
                        if multicolumn and config.predicts > 1:
                            mylogging.warn(
                                f"Warning in model {iterated_model_name} \n\nOne-step prediction on "
                                "multivariate data (more columns). Use multi_step (y lengt equals to predict) "
                                "or do use some one column data input in config models_input or predict just one value."
                            )
                            continue

                    if config.multiprocessing == "process":

                        pipes.append(multiprocessing.Pipe(duplex=False))
                        p = multiprocessing.Process(
                            target=predictit._main_loop.train_and_predict,
                            kwargs={
                                **predict_parameters,
                                **{
                                    "pipe": pipes[-1][1]
                                }
                            },
                        )

                        p.Daemon = True  # Baby process will be terminated if parent killed
                        p.start()

                    elif config.multiprocessing == "pool":

                        pool.apply_async(
                            predictit._main_loop.train_and_predict,
                            (),
                            predict_parameters,
                            callback=return_result,
                        )

                    else:
                        results = {
                            **results,
                            **predictit._main_loop.train_and_predict(**predict_parameters),
                        }

    if config.multiprocessing:
        if config.multiprocessing == "process":
            for i in pipes:
                try:
                    results = {**results, **i[0].recv()}
                except Exception:
                    pass

        if config.multiprocessing == "pool":
            pool.close()
            pool.join()

        for i in results.values():
            mylogging.my_logger.log_and_warn_from_lists(
                i["logs_list"], i["warnings_list"])

    # Create confidence intervals
    if config.confidence_interval:
        try:
            lower_bound, upper_bound = predictit.misc.confidence_interval(
                column_for_predictions_series.values,
                predicts=config.predicts,
                confidence=config.confidence_interval,
            )

            grey_area = ["Lower bound", "Upper bound"]
            bounds = True
        except Exception:
            bounds = False
            grey_area = ["Lower bound", "Upper bound"]
            mylogging.traceback("Error in compute confidence interval",
                                level="ERROR")

    else:
        bounds = False
        grey_area = False

    ###############
    ### ANCHOR ### Results processing
    #############

    # Criterion is the best of average from repetitions
    time_point = update_time_table(time_point)
    progress_phase = "Evaluation"
    update_gui(progress_phase, "progress_phase")

    # Two kind of results we will create. Both as dataframe
    #   - First are all the details around prediction. Model errors, time, memory peak etc.
    #   - Second we have predicted values

    # Results such as trained model etc. that cannot be displayed in dataframe are in original results dict.

    # Convert results from dictionary to dataframe - exclude objects like trained model

    results_df = pd.DataFrame.from_dict(results, orient="index")

    if results_df.empty:
        raise RuntimeError(
            mylogging.return_str(
                "None of models finished predictions. Set config.logger_level = 'DEBUG' for more info.",
                caption="All models failed for some reason",
            ))

    evaluated_matrix = np.zeros(
        (1, len(config.optimization_values), len(config.used_models)))
    evaluated_matrix.fill(np.nan)

    for k in results.values():
        evaluated_matrix[0, k["Index"][0], k["Index"][1]] = k["Model error"]

    (
        _,
        best_models_optimized_values,
        optimized_values_results_df,
        best_model_name,
        best_optimized_value,
    ) = predictit.analyze.analyze_results(
        evaluated_matrix,
        config.optimization_values,
        config.models.used_models,
        config.prediction.error_criterion,
    )

    # Generate date indexes for result predictions
    last_date = column_for_predictions_series.index[-1]

    if isinstance(
            last_date,
        (pd.core.indexes.datetimes.DatetimeIndex,
         pd._libs.tslibs.timestamps.Timestamp),
    ):
        date_index = pd.date_range(
            start=last_date,
            periods=config.predicts + 1,
            freq=column_for_predictions_series.index.freq,
        )[1:]
        date_index = pd.to_datetime(date_index)

    else:
        date_index = list(range(last_date + 1,
                                last_date + config.predicts + 1))

    predictions_df = pd.DataFrame(index=date_index)

    results_df.sort_values("Model error", inplace=True)

    for i, row in results_df.iterrows():
        predictions_df[i] = row["Results"]

    if predictions_df.empty:
        raise RuntimeError(
            mylogging.return_str(
                "Neither of models finished prediction. Set config.logger_level = 'DEBUG' for more info."
            ))

    if config.variable_optimization.optimization:
        best_optimized_values_dict = {
            j: best_models_optimized_values[i]
            for i, j in enumerate(config.used_models)
        }

        best_indexes = []

        for i, row in results_df.iterrows():
            if row["Optimization value"] == best_optimized_values_dict[
                    row["Name"]]:
                best_indexes.append(i)

        optimization_result = predictit._result_classes.Optimization(
            optimized_variable=config.variable_optimization.
            optimization_variable,
            optimized_options=config.variable_optimization.optimization_values,
            best_value=best_optimized_value,
            values_results_df=optimized_values_results_df,
            best_values_for_models=best_optimized_values_dict,
            all_models_results_df=results_df,
            all_models_predictions_df=predictions_df,
        )

        predictions_df = predictions_df[best_indexes]
        predictions_df.columns = [
            results_df.loc[i]["Name"] for i in predictions_df.columns
        ]
        results_df = results_df.loc[best_indexes]

        results_df.rename(columns={"A": "Col_1"}, inplace=True)

    else:
        optimization_result = None

    if config.hyperparameter_optimization.optimizeit:
        hyperparameter_optimization_kwargs = results_df[
            "Best optimized parameters"].to_dict()
    else:
        hyperparameter_optimization_kwargs = None

    results_df.set_index("Name", inplace=True)

    results_to_drop = [
        i for i in [
            "Index",
            "Trained model",
            "Test errors",
            "Results",
            "logs_list",
            "warnings_list",
        ] if i in results_df.columns
    ]
    results_df.drop(columns=results_to_drop, inplace=True)

    best_model_predicts = predictions_df[best_model_name]

    ###############
    ### ANCHOR ### Plot
    #############

    if config.variable_optimization.optimization and config.variable_optimization.plot_all_optimized_models:
        predictions_for_plot = optimization_result.all_models_predictions_df.copy(
        )
    else:
        predictions_for_plot = predictions_df.copy()

    predictions_for_plot.columns = [
        f"{i + 1} - {j}" for i, j in enumerate(predictions_for_plot.columns)
    ]

    if config.mode == "validate":
        best_model_name_plot = "Test"
        predictions_df.insert(0, "Test", test_unstandardized)
        predictions_for_plot.insert(0, "Test", test_unstandardized)

    else:
        best_model_name_plot = predictions_for_plot.columns[0]

    bounds_df = pd.DataFrame(index=date_index)

    if bounds:
        bounds_df["Upper bound"] = upper_bound
        bounds_df["Lower bound"] = lower_bound

    last_value = float(column_for_predictions_series.iloc[-1, 0])

    predictions_for_plot_limited = pd.concat(
        [
            predictions_for_plot.iloc[:, :config.plot_number_of_models],
            bounds_df
        ],
        axis=1,
    )

    predictions_with_history = pd.concat(
        [
            column_for_predictions_series[-config.plot_history_length:],
            predictions_for_plot_limited,
        ],
        sort=False,
    )
    predictions_with_history.iloc[-config.predicts - 1, :] = last_value

    if config.sort_results_by == "name":
        results_df.sort_index(key=lambda x: x.str.lower(), inplace=True)
        predictions_df.sort_index(key=lambda x: x.str.lower(), inplace=True)

    if config.general.analyzeit:
        import matplotlib.pyplot as plt

        plt.show()

    time_point = update_time_table(time_point)
    progress_phase = "plot"
    update_gui(progress_phase, "progress_phase")

    if config.output.plot_subconfig.show_plot or config.output.plot_subconfig.save_plot:

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ResourceWarning)

            return_div = True if _GUI else False

            if config.plot_type == "with_history":
                div = plot(
                    predictions_with_history,
                    plot_library=config.plot_library,
                    plot_name=config.plot_name,
                    legend=config.plot_legend,
                    highlighted_column=predicted_column_name,
                    surrounded_column=best_model_name_plot,
                    grey_area=grey_area,
                    save=config.save_plot,
                    return_div=return_div,
                    show=config.output.plot_subconfig.show_plot,
                )

            elif config.plot_type == "just_results":
                div = plot(
                    predictions_for_plot,
                    plot_library=config.plot_library,
                    legend=config.plot_legend,
                    highlighted_column=best_model_name_plot,
                    save=config.save_plot,
                    show=config.output.plot_subconfig.show_plot,
                )

    update_time_table(time_point)
    progress_phase = "Completed"
    update_gui(progress_phase, "progress_phase")

    ###############
    ### ANCHOR ### Table
    #############

    time_df.append(["Complete time", round((time.time() - time_begin), 3)])
    time_df = pd.DataFrame(time_df, columns=["Part", "Time"])

    simple_table_df = mdp.misc.edit_table_to_printable(results_df[[
        "Model error"
    ]].iloc[:config.print_number_of_models, :].reset_index())

    detailed_table_df = results_df.iloc[:config.
                                        print_number_of_models, :].reset_index(
                                        )
    detailed_table_df.drop(["Unstandardized model error"],
                           axis=1,
                           inplace=True)
    detailed_table_df = mdp.misc.edit_table_to_printable(detailed_table_df)

    tables = predictit._result_classes.Tables(
        simple=tabulate(
            simple_table_df.values,
            headers=["Model", f"Average {config.error_criterion} error"],
            **config.table_settings,
        ),
        detailed=tabulate(
            detailed_table_df.values,
            headers=detailed_table_df.columns,
            **config.table_settings,
        ),
        time=tabulate(time_df.values,
                      headers=time_df.columns,
                      **config.table_settings),
        simple_table_df=simple_table_df,
        detailed_table_df=detailed_table_df,
    )

    ###############
    ### ANCHOR ### Results
    #############

    misc_result = predictit._result_classes.Misc(
        evaluated_matrix=evaluated_matrix)
    result = predictit._result_classes.Result(
        best_prediction=best_model_predicts,
        best_model_name=best_model_name,
        predictions=predictions_df,
        results_df=results_df,
        results=results,
        with_history=predictions_with_history,
        tables=tables,
        config=config,
        misc=misc_result,
        optimization=optimization_result,
        hyperparameter_optimization_kwargs=hyperparameter_optimization_kwargs,
    )

    ###############
    ### ANCHOR ### Print
    #############

    if config.print_result_details:
        print((
            f"\nBest model is {best_model_name} with results \n\n{best_model_predicts}\n\nWith model error {config.error_criterion} = "
            f"{results_df.loc[best_model_name, 'Model error']}"))

    if config.print_table == "simple":
        print(f"\n{tables.simple}\n")

    elif config.print_table == "detailed":
        print(f"\n{tables.detailed}\n")

    if config.print_time_table:
        print(f"\n{tables.time}\n")

    ###############
    ### ANCHOR ### Return
    #############

    mylogging.reset_outer_warnings_filter()

    # Return stdout and stop collect warnings and printed output
    if _GUI:
        output = sys.stdout.getvalue()
        sys.stdout = stdout
        result.output = output
        print(output)

    if _GUI:
        result.plot = div

    if config.return_internal_results:
        return {
            "data_for_predictions (X, y)": data_for_predictions,
            "model_train_input": model_train_input,
            "model_predict_input": model_predict_input,
            "model_test_inputs": model_test_inputs,
            "models_test_outputs": models_test_outputs,
        }

    return result
def train(
    data: tuple[np.ndarray, np.ndarray],
    model="BayesianRidge",
    n_estimators=100,
    alpha=0.0001,
    alpha_1=1.0e-6,
    alpha_2=1.0e-6,
    lambda_1=1.0e-6,
    lambda_2=1.0e-6,
    n_iter=300,
    epsilon=1.35,
    alphas=[0.1, 0.5, 1],
    gcv_mode="auto",
    solver="auto",
    n_hidden=20,
    rbf_width=0,
    activation_func="selu"
    #  load_trained_model=0, update_trained_model=1, save_model=1, saved_model_path_string='stored_models',
) -> Any:
    """Sklearn model. Models as input parameter. Can be linear, ridge, Huber or much more.
    It also contain extreme learning machine model from sklearn extensions.

    Note:
        There are many parameters in function, but all models use just a few of them.
        Usually default parameters are just enough.

        Some of models are regressors and some are classifiers. If it's classifier, it's optimal
        to have data sorted in limited number of bins.

    Args:
        data (tuple[np.ndarray, np.ndarray]) - Tuple (X, y) of input train vectors X and train outputs y.
            Insert input with no constant column - added by default in sklearn.
            Check `mydatapreprocessing` how to generate output.
        model ((str, object), optional): Model that will be used. You can insert model itself or
            just a name of used class. All possible options below in docs. Defaults to 'BayesianRidge'.
        n_estimators (100, optional):  Parameter of some model. Defaults to 100.
        alpha (float, optional): Parameter of some model. Defaults to 0.0001.
        alpha_1 (float, optional): Parameter of some model. Defaults to 1.e-6.
        alpha_2 (float, optional): Parameter of some model. Defaults to 1.e-6.
        lambda_1 (float, optional): Parameter of some model. Defaults to 1.e-6.
        lambda_2 (float, optional): Parameter of some model. Defaults to 1.e-6.
        n_iter (int, optional): Parameter of some model. Defaults to 300.
        epsilon (float, optional): Parameter of some model. Defaults to 1.35.
        alphas (list, optional): Parameter of some model. Defaults to [0.1, 0.5, 1].
        gcv_mode (str, optional): Parameter of some model. Defaults to 'auto'.
        solver (str, optional): Parameter of some model. Defaults to 'auto'.
        n_hidden (int, optional): Parameter of some model. Defaults to 20.
        rbf_width (int, optional): Parameter of some model. Defaults to 0.
        activation_func (str, optional): Parameter of some model. Defaults to 'selu'.

    Returns:
        np.ndarray: Predictions of input time series.

    Options if string::

        ['PLSRegression', 'RandomForestRegressor', 'ExtraTreesRegressor', 'BaggingRegressor',
        'GradientBoostingRegressor', 'AdaBoostRegressor', 'VotingRegressor', 'StackingRegressor',
        'RandomForestClassifier', 'ExtraTreesClassifier', 'BaggingClassifier', 'GradientBoostingClassifier',
        'AdaBoostClassifier', 'VotingClassifier', 'StackingClassifier', 'GaussianProcessRegressor',
        'GaussianProcessClassifier', 'IsotonicRegression', Regression', 'HuberRegressor', 'LinearRegression',
        'LogisticRegression', 'LogisticRegressionCV', 'PassiveAggressiveRegressor', 'SGDRegressor',
        'TheilSenRegressor', 'RANSACRegressor', 'PoissonRegressor', 'GammaRegressor', 'TweedieRegressor',
        'PassiveAggressiveClassifier', 'RidgeClassifier', 'RidgeClassifierCV', 'SGDClassifier', 'OneVsRestClassifier',
        'OneVsOneClassifier', 'OutputCodeClassifier', 'MultiOutputRegressor', 'RegressorChain',
        'MultiOutputClassifier', 'ClassifierChain', 'KNeighborsRegressor', 'RadiusNeighborsRegressor',
        'KNeighborsClassifier', 'RadiusNeighborsClassifier', 'MLPRegressor', 'MLPClassifier',
        'SelfTrainingClassifier', 'DecisionTreeRegressor', 'ExtraTreeRegressor', 'DecisionTreeClassifier',
        'ExtraTreeClassifier', 'TransformedTargetRegressor', 'BayesianRidge', 'ElasticNet', 'Hinge', 'Lars', 'LarsCV',
        'Lasso', 'LassoCV', 'LassoLarsIC', 'Log', 'ModifiedHuber', 'MultiTaskElasticNet', 'MultiTaskLasso',
        'MultiTaskLassoCV', 'OrthogonalMatchingPursuit', 'OrthogonalMatchingPursuitCV', 'Perceptron', 'Ridge',
        'RidgeCV', 'SquaredLoss', 'SVR',
        # Sklearn extensions
        'ELMClassifier', 'ELMRegressor', 'GenELMClassifier', 'GenELMRegressor']
    """
    from sklearn import (
        multioutput,
        linear_model,
        ensemble,
        tree,
        neighbors,
        gaussian_process,
    )

    X, y = get_inputs(data)

    # If string like 'LinearRegression', find class with such a name
    if isinstance(model, str):

        for i in [linear_model, ensemble, tree, neighbors, gaussian_process]:
            if model in i.__all__:
                model = getattr(i, model)
                break

        # If model is still string, not object from sklearn, it means it was not found,
        # may be from sklearnextensions library
        if isinstance(model, str):

            import sklearn_extensions.extreme_learning_machines.elm as elm

            model = getattr(elm, model)

            # Model defined by string not found
            if isinstance(model, str):

                raise AttributeError(
                    mylogging.return_str(
                        "You defined model that was not found in sklearn. You can use not only string, but also"
                        "object or class itself. You can use function `get_all_models` to get list of all"
                        "possible models and then use one of them."))

    # If class, but no object was configured, create instance
    if callable(model):
        model = model()

    params = {
        "n_estimators": n_estimators,
        "alpha": alpha,
        "alpha_1": alpha_1,
        "alpha_2": alpha_2,
        "lambda_1": lambda_1,
        "lambda_2": lambda_2,
        "n_iter": n_iter,
        "epsilon": epsilon,
        "alphas": alphas,
        "gcv_mode": gcv_mode,
        "solver": solver,
        "n_hidden": n_hidden,
        "rbf_width": rbf_width,
        "activation_func": activation_func,
    }

    # Params, that are configured in function params as well as configurable in models
    used_params = {
        i: j
        for (i, j) in params.items() if i in model.get_params()
    }

    model.set_params(**used_params)

    if y.shape[1] == 1:
        model.output_shape = "one_step"
        setattr(model, "output_shape", "one_step")

        y = y.ravel()

    else:
        if model._estimator_type == "regressor":
            model = multioutput.MultiOutputRegressor(model)
        elif model._estimator_type == "classifier":
            model = multioutput.MultiOutputClassifier(model)

        setattr(model, "output_shape", "multi_step")

    model.fit(X, y)

    return model
Beispiel #10
0
    analyze,
    best_params,
    configuration as _configuration,
    evaluate_predictions,
    gui_start,
    _helpers,
    main,
    _main_loop,
    misc,
    models,
)

# Just shortcuts to avoid importing from main
from .main import (
    predict,
    predict_multiple_columns,
    compare_models,
    find_optimal_input_for_models,
)

from .configuration import config

import sys

import mylogging

if sys.version_info.major < 3 or (sys.version_info.major == 3
                                  and sys.version_info.minor < 7):
    raise RuntimeError(
        mylogging.return_str("Python version >= 3.7 necessary."))
def compare_predicted_to_test(
    predicted: np.ndarray,
    test: np.ndarray,
    error_criterion: str = "mape",
    plot: bool = False,
    model_name: str = "Model",
    data_name: str = "Data",
):
    """Compare tested model with reality.

    Args:
        predicted (np.ndarray): Model output.
        test (np.ndarray): Correct values or output from data_pre funcs.
        error_criterion (str, optional): 'mape' or 'rmse'. Defaults to 'mape'.
        plot (bool, optional): Whether create plot. Defaults to False.
        model_name (str, optional): Model name for plot. Defaults to "Model".
        data_name (str, optional): Data name for plot. Defaults to "Data".

    Returns:
        float: Error criterion value (mape or rmse). If configured, plot of results as well.
    """

    predicts = len(predicted)

    if predicts != len(test):
        print("Test and predicted length not equal")
        return np.nan

    if predicted is not None:
        if plot:

            if not misc.GLOBAL_VARS.PLOTS_CONFIGURED:
                misc.setup_plots()

            import matplotlib.pyplot as plt

            plt.figure(figsize=(10, 6))
            plt.plot(test, label="Reality")
            plt.plot(predicted, label="Prediction")
            plt.legend(loc="upper right")
            plt.xlabel("t")
            plt.ylabel("Predicted value")
            plt.title("Prediction with \n {} with data {}".format(
                model_name, data_name))
            plt.show()

        error = np.array(predicted) - np.array(test)
        """
        abs_error = [abs(i) for i in error]
        sum_abs_error = sum(abs_error)
        mae = sum_abs_error / predicts
        """

        if error_criterion == "mse" or error_criterion == "mse_sklearn":
            from sklearn.metrics import mean_squared_error

            criterion_value = mean_squared_error(test, predicted)

        elif error_criterion == "max_error":
            from sklearn.metrics import max_error

            criterion_value = max_error(test, predicted)

        elif error_criterion == "rmse":
            rmseerror = error**2
            criterion_value = (sum(rmseerror) / predicts)**(1 / 2)

        elif error_criterion == "mape":
            no_zero_test = np.where(abs(test) >= 1, test, 1)
            criterion_value = np.mean(np.abs(
                (test - predicted) / no_zero_test)) * 100

        elif error_criterion == "dtw":

            if not importlib.util.find_spec("dtaidistance"):
                raise ImportError(
                    mylogging.return_str(
                        "Library dtaidistance necessary for configured dtw (dynamic time warping) "
                        "error criterion is not installed! Install it via \n\npip install dtaidistance"
                    ))

            from dtaidistance import dtw

            criterion_value = dtw.distance_fast(predicted.astype("double"),
                                                test.astype("double"))

        else:
            raise KeyError(
                mylogging.return_str(
                    f"bad 'error_criterion' in config - '{error_criterion}'. Use some from options from config "
                    "comment... "))

        return criterion_value
Beispiel #12
0
def analyze_column(data: np.ndarray | pd.DataFrame,
                   lags: int = 5,
                   window: int = 5) -> None:
    """Function one-dimensional data (predicted column), that plot data, it's distribution, some details like minimum,
    maximum, std, mean etc. It also create autocorrelation and partial autocorrelation (good for ARIMA models) and
    plot rolling mean and rolling std. It also tell if data are probably stationary or not.

    Args:
        data (np.ndarray | pd.DataFrame): Time series data.
        lags (int, optional): Lags used for autocorrelation. Defaults to 5.
        window (int, optional): Window for rolling average and rolling std. Defaults to 5.

    """
    if not misc.GLOBAL_VARS.PLOTS_CONFIGURED:
        misc.setup_plots()

    import matplotlib.pyplot as plt
    import seaborn as sns
    from statsmodels.graphics.tsaplots import plot_acf
    from statsmodels.graphics.tsaplots import plot_pacf
    from statsmodels.tsa.stattools import adfuller

    import mydatapreprocessing

    data = np.array(data)

    if data.ndim != 1 and 1 not in data.shape:
        raise ValueError(
            mylogging.return_str(
                "Select column you want to analyze",
                caption="analyze_data function only for one-dimensional data!",
            ))

    data = data.ravel()

    print(
        f"Length: {len(data)}\n"
        f"Minimum: {np.nanmin(data)}\n"
        f"Maximum: {np.nanmax(data)}\n"
        f"Mean: {np.nanmean(data)}\n"
        f"Std: {np.nanstd(data)}\n"
        f"First few values: {data[-5:]}\n"
        f"Middle values: {data[int(-len(data)/2): int(-len(data)/2) + 5]}\n"
        f"Last few values: {data[-5:]}\n"
        f"Number of nan (not a number) values: {np.count_nonzero(np.isnan(data))}\n"
    )

    # Data and it's distribution

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(data)
    plt.xlabel("t")
    plt.ylabel("f(x)")

    plt.subplot(1, 2, 2)
    sns.histplot(data, bins=100, kde=True, color="skyblue")
    plt.xlabel("f(x)")
    plt.ylabel("Distribution")

    plt.tight_layout()
    plt.suptitle("Data and it's distribution", fontsize=20)
    plt.subplots_adjust(top=0.88)
    plt.draw()

    fig, (ax, ax2) = plt.subplots(ncols=2, figsize=(10, 5))
    fig.suptitle("Repeating patterns - autocorrelation")

    try:

        plot_acf(data, lags=lags, ax=ax)
        ax.set_xlabel("Lag")
        plot_pacf(data, lags=lags, ax=ax2)
        ax2.set_xlabel("Lag")
        plt.draw()

    except Exception:
        mylogging.traceback(
            "Error in analyze_column function - in autocorrelation function: Maybe more lags, than values"
        )

    # Moving average
    rolling_mean = np.sum(
        mydatapreprocessing.misc.rolling_windows(data, window), 1)
    rolling_std = np.std(
        mydatapreprocessing.misc.rolling_windows(data, window), 1)

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(rolling_mean)
    plt.xlabel("t")
    plt.ylabel("Rolling average x")

    plt.subplot(1, 2, 2)
    plt.plot(rolling_std)
    plt.xlabel("f(x)")
    plt.ylabel("Rolling standard deviation x")

    plt.tight_layout()
    plt.suptitle("Rolling average and rolling standard deviation", fontsize=20)
    plt.subplots_adjust(top=0.88)
    plt.draw()

    # Dick Fuller test for stationarity
    pvalue = adfuller(data)[1]
    cutoff = 0.05
    if pvalue < cutoff:
        print(
            f"\np-value = {pvalue} : Analyzed column is probably stationary.\n"
        )
    else:
        print(
            f"\np-value = {pvalue} : Analyzed column is probably not stationary.\n"
        )