def rms(self,
            s,
            axis=None,
            dtype=None,
            out=None,
            keepdims=np._NoValue,
            initial=np._NoValue):
        r"""
        Root Mean Square One of the most important basic features that can be extracted directly from the time-domain
        signal is the RMS which describe the energy of the signal. It is defined as the square root
        of the average squared value of the signal and can also be called the normalized energy of the
        signal.

        $RMS = \sqrt{\frac{1}{n}\sum_{i=0}^{n-1}s_{i}^{2}}$
        
        Especially in vibration analysis the RMS is used to perform fault detection, i.e. triggering an
        alarm, whenever the RMS surpasses a level that depends on the size of the machine, the nature
        of the signal (for instance velocity or acceleration), the position of the accelerometer, and so on.
        After the detection of the existence of a failure, fault diagnosis is performed relying on more
        sophisticated features. For instance the ISO 2372 (VDI 2056) norms define three different velocity
        RMS alarm levels for four different machine classes divided by power and foundations of the rotating
        machines.

        RMS of array elements over a given axis.

        **Parameters**
        
        * **s:** (2d array_like) Elements to get RMS.
        * **axis:** (None or int or tuple of ints, optional) Axis or axes along which a RMS is performed.  
        The default, axis=None, will get RMS of all the elements of the input array. If axis is negative
        it counts from the last to the first axis. If axis is a tuple of ints, a RMS is performed on all
        of the axes specified in the tuple instead of a single axis or all the axes as before.
        * **dtype:** (dtype, optional) The type of the returned array and of the accumulator in which the
        elements are summed.  The dtype of `s` is used by default unless `s` has an integer 
        dtype of less precision than the default platform integer.  In that case, if `s` is signed 
        then the platform integer is used while if `s` is unsigned then an unsigned integer of the
        same precision as the platform integer is used.
        * **out:** (ndarray, optional) Alternative output array in which to place the result. It must have
        the same shape as the expected output, but the type of the output values will be cast if necessary.
        * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option, the result will broadcast correctly 
        against the input array. If the default value is passed, then `keepdims` will not be passed through
        to the `sum` method of sub-classes of `ndarray`, however any non-default value will be.  If the
        sub-class' method does not implement `keepdims` any exceptions will be raised.
        * **initial:** (scalar, optional) Starting value for the sum.

        **Returns**
        
        * **RMS_along_axis:** (darray) An array with the same shape as `s`, with the specified
        axis removed.   If `s` is a 0-d array, or if `axis` is None, a scalar is returned. 
        If an output array is specified, a reference to `out` is returned.

        ## Snippet code
        
        ```python
        >>> from rackio_AI import RackioAIFE
        >>> feature_extraction = RackioAIFE()
        >>> feature_extraction.stats.rms(np.array([0.5, 1.5]))
        1.118033988749895
        >>> feature_extraction.stats.rms(np.array([0.5, 0.7, 0.2, 1.5]), dtype=np.int32)
        0.7071067811865476
        >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]]))
        3.605551275463989
        >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]]), axis=0)
        array([0.        , 3.60555128])
        >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]]), axis=1)
        array([0.70710678, 3.53553391])

        ```
        You can also start the sum with a value other than zero:
        ```python
        >>> feature_extraction.stats.rms(np.array([2, 7, 10]), initial=5)
        7.2571803523590805

        ```
        """
        s = Utils.check_dataset_shape(s)
        return (np.sum(s**2,
                       axis=axis,
                       dtype=dtype,
                       out=out,
                       keepdims=keepdims,
                       initial=initial) / s.shape[0])**0.5
    def peak(self, s, ref=None, axis=0, rate=None, **kwargs):
        r"""
        I we consider only the maximum amplitude relative to zero $s_{ref}=0$ or a general reference
        level $s_{ref}$, we get the peak value

        $peak = \max\left(s_{i}-ref\right)$

        Often the peak is used in conjunction with other statistical parameters, for instance the 
        peak-to-average rate.

        $peak = \frac{\max\left(s_{i}-ref\right)}{\frac{1}{N}\sum_{i=0}^{N-1}s_{i}}$

        or peak-to-median rate

        **Parameters**

        * **s:**
        * **ref:**
        * **axis:**
        * **rate:**

        **Returns**

        * **peak:**

        ## Snippet code

        ```python
        >>> from scipy.stats import norm
        >>> from rackio_AI import RackioAIFE
        >>> feature_extraction = RackioAIFE()
        >>> s = norm.rvs(size=1000, random_state=3)
        >>> feature_extraction.stats.peak(s)
        array([1.91382976])
        >>> s = norm.rvs(size=(1000,2), random_state=3)
        >>> feature_extraction.stats.peak(s)
        array([1.0232499 , 3.26594839])

        ```

        """
        s = Utils.check_dataset_shape(s)
        if not ref == None:

            _peak = np.max(s - ref, axis=axis)

        else:

            _peak = np.max(s - s[0, :], axis=axis)

        if not rate == None:

            if rate.lower() == 'average':

                return _peak / self.mean(s, **kwargs)

            elif rate.lower() == 'median':

                return _peak / self.median(s, **kwargs)

        else:

            return _peak
    def skew(self, s, axis=0, bias=True, nan_policy='propagate'):
        r"""
        Compute the sample skewness of a data set.
        For normally distributed data, the skewness should be about zero. For
        unimodal continuous distributions, a skewness value greater than zero means
        that there is more weight in the right tail of the distribution. The
        function `skewtest` can be used to determine if the skewness value
        is close enough to zero, statistically speaking.
        
        **Parameters**

        * **s:** (ndarray) Input array.
        * **axis:** (int or None, optional) Axis along which skewness is calculated. Default is 0.
        If None, compute over the whole array `s`.
        * **bias:** (bool, optional) If False, then the calculations are corrected for statistical bias.
        * **nan_policy:** ({'propagate', 'raise', 'omit'}, optional) Defines how to handle when input contains nan.
        The following options are available (default is 'propagate'):
            * 'propagate': returns nan
            * 'raise': throws an error
            * 'omit': performs the calculations ignoring nan values
        
        **Returns**
        
        * **skewness:** (ndarray) The skewness of values along an axis, returning 0 where all values are equal.
        
        ## Notes

        The sample skewness is computed as the Fisher-Pearson coefficient
        of skewness, i.e.

        $g_1=\frac{m_3}{m_2^{3/2}}$

        where

        $m_i=\frac{1}{N}\sum_{n=1}^N(x[n]-\bar{x})^i$
        
        is the biased sample $i\texttt{th}$ central moment, and $\bar{x}$ is
        the sample mean.  If $bias$ is False, the calculations are
        corrected for bias and the value computed is the adjusted
        Fisher-Pearson standardized moment coefficient, i.e.

        $G_1=\frac{k_3}{k_2^{3/2}}=\frac{\sqrt{N(N-1)}}{N-2}\frac{m_3}{m_2^{3/2}}.$
        
        ## References
        
        .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard
        Probability and Statistics Tables and Formulae. Chapman & Hall: New
        York. 2000.
        Section 2.2.24.1
        
        ## Snippet code
        ```python
        >>> import numpy as np
        >>> from rackio_AI import RackioAIFE
        >>> feature_extraction = RackioAIFE()
        >>> s = np.array([1, 2, 3, 4, 5])
        >>> feature_extraction.stats.skew(s)
        array([0.])
        >>> s = np.array([2, 8, 0, 4, 1, 9, 9, 0])
        >>> feature_extraction.stats.skew(s)
        array([0.26505541])
        
        ```
        """
        s = Utils.check_dataset_shape(s)
        return skew(s, axis=axis, bias=bias, nan_policy=nan_policy)
    def std(self,
            s,
            axis=None,
            dtype=None,
            out=None,
            ddof=0,
            keepdims=np._NoValue):
        r"""
        Compute the standard deviation along the specified axis.

        Returns the standard deviation, a measure of the spread of a distribution,
        of the array elements. The standard deviation is computed for the
        flattened array by default, otherwise over the specified axis.
        
        **Parameters**
        
        * **s:** (2d array_like) Calculate the standard deviation of these values.
        * **axis:** (None or int or tuple of ints, optional) Axis or axes along which the standard deviation is computed.
        The default is to compute the standard deviation of the flattened array.
        If this is a tuple of ints, a standard deviation is performed over multiple axes, instead of a single
        axis or all the axes as before.
        * **dtype:** (dtype, optional) Type to use in computing the standard deviation. For arrays of
        integer type the default is float64, for arrays of float types it is the same as the array type.
        * **out:** (ndarray, optional) Alternative output array in which to place the result. It must have
        the same shape as the expected output but the type (of the calculated values) will be cast if necessary.
        * **ddof:** (int, optional) Means Delta Degrees of Freedom.  The divisor used in calculations
        is $N - ddof$, where $N$ represents the number of elements. By default `ddof` is zero.
        * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option, the result will broadcast correctly 
        against the input array. If the default value is passed, then `keepdims` will not be passed through 
        to the `std` method of sub-classes of `ndarray`, however any non-default value will be.  If the
        sub-class' method does not implement `keepdims` any exceptions will be raised.

        **Returns**
        
        * **standard_deviation:** (ndarray) If `out` is None, return a new array containing the standard deviation,
        otherwise return a reference to the output array.

        ## Notes
        
        The standard deviation is the square root of the average of the squared
        deviations from the mean, i.e.

        $\mu = \frac{1}{N}\sum_{i=1}^{n}s_{i}$

        $std = \sqrt{\frac{1}{N}\sum_{i=1}^{n}|s_{i}-\mu|^2}$
        
        ## Snippet code
        ```python
        >>> import numpy as np
        >>> from rackio_AI import RackioAIFE
        >>> feature_extraction = RackioAIFE()
        >>> s = np.array([[1, 2], [3, 4]])
        >>> feature_extraction.stats.std(s, axis=0)
        array([1., 1.])
        >>> feature_extraction.stats.std(s, axis=1)
        array([0.5, 0.5])

        ```
        
        ### In single precision, std() can be inaccurate

        ```python
        >>> s = np.zeros((2, 512*512), dtype=np.float32)
        >>> s[0, :] = 1.0
        >>> s[1, :] = 0.1
        >>> feature_extraction.stats.std(s)
        0.45000005
        >>> s = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]])
        >>> feature_extraction.stats.std(s)
        2.614064523559687

        ```
        """
        s = Utils.check_dataset_shape(s)
        return np.std(s,
                      axis=axis,
                      dtype=dtype,
                      out=dtype,
                      ddof=ddof,
                      keepdims=keepdims)
Beispiel #5
0
    def load(self,
             pathname: str,
             ext: str = ".tpl",
             reset_index=False,
             **kwargs):
        """
        Load data into DataFrame format:

        * **.tpl:** Is an [OLGA](https://www.petromehras.com/petroleum-software-directory/production-engineering-software/olga-dynamic-multiphase-flow-simulator)
        extension file.
        * **.pkl:** Numpy arrays or Pandas.DataFrame saved in pickle format.

        ___
        **Parameters**

        * **:param pathname:** (str) Filename or directory. 
            * If the *pathname* is a directory, it will load all the files with extension *ext*.
            * If the *pathname* is a filename, it will load the file with a supported extension.
        * **:param ext:** (str) filename extension, it's necessary if pathname is a directory.
        Extensions supported are:
            * *.tpl*  [OLGA](https://www.petromehras.com/petroleum-software-directory/production-engineering-software/olga-dynamic-multiphase-flow-simulator)
        extension file.
            * *.xls*
            * *.xlsx*
            * *.xlsm*
            * *.xlsb*
            * *.odf*
            * *.ods*
            * *.odt*
            * *.csv*
            * *.pkl* (Only if the pkl saved is a DataFrame)

        **:return:**

        * **data:** (pandas.DataFrame)

        ___
        ## Snippet code

        ```python
        >>> import os
        >>> from rackio_AI import RackioAI, get_directory
        >>> filename = os.path.join(get_directory('Leak'), 'Leak01.tpl')
        >>> df = RackioAI.load(filename)
        >>> print(df.head())
        tag      TIME_SERIES PT_SECTION_BRANCH_TUBERIA_PIPE_Pipe60_NR_1  ... CONTR_CONTROLLER_CONTROL_FUGA     file
        variable                                               Pressure  ...             Controller_output filename
        unit               S                                         PA  ...                                   .tpl
        0           0.000000                                   568097.3  ...                           0.0   Leak01
        1           0.502732                                   568098.2  ...                           0.0   Leak01
        2           1.232772                                   568783.2  ...                           0.0   Leak01
        3           1.653696                                   569367.3  ...                           0.0   Leak01
        4           2.200430                                   569933.5  ...                           0.0   Leak01
        <BLANKLINE>
        [5 rows x 12 columns]

        **Example loading a directory with .tpl files**

        >>> directory = os.path.join(get_directory('Leak'))
        >>> df = RackioAI.load(directory)
        >>> print(df.head())
        tag      TIME_SERIES PT_SECTION_BRANCH_TUBERIA_PIPE_Pipe60_NR_1  ... CONTR_CONTROLLER_CONTROL_FUGA     file
        variable                                               Pressure  ...             Controller_output filename
        unit               S                                         PA  ...                                   .tpl
        0           0.000000                                   568097.3  ...                           0.0   Leak01
        1           0.502732                                   568098.2  ...                           0.0   Leak01
        2           1.232772                                   568783.2  ...                           0.0   Leak01
        3           1.653696                                   569367.3  ...                           0.0   Leak01
        4           2.200430                                   569933.5  ...                           0.0   Leak01
        <BLANKLINE>
        [5 rows x 12 columns]

        **Example loading a directory with .csv files**

        >>> directory = os.path.join(get_directory('csv'), "Hysys")
        >>> df = RackioAI.load(directory, ext=".csv", _format="hysys")
        >>> print(df.head())
          (Time, [seconds]) (PIC-118 - PV, [kPa]) (PIC-118 - OP, [%]) (SPRDSHT-1 - Cell Matrix (G-16), []) (UIC-101 - OP, [%])
        1                 0               294.769                  42                              37.6105                  10
        2               0.3               294.769                  42                              37.6105                  10
        3               0.6               294.769                  42                              37.6105                  10
        4               0.9               294.769                  42                              37.6105                  10
        5               1.1               294.769                  42                              37.6105                  10

        >>> directory = os.path.join(get_directory('csv'), "VMGSim")
        >>> df = RackioAI.load(directory, ext=".csv", _format="vmgsim")
        >>> print(df.head())
          (time, s) (/Bed-1.In.MoleFlow, kmol/h) (/Bed-1.In.P, kPa)  ... (/Sep2.In.P, kPa) (/Sep3.In.P, kPa) (/Tail_Gas.In.T, C)
        1         1                  2072.582713        285.9299038  ...       315.8859771       291.4325134                 159
        2         2                  2081.622826        286.9027793  ...       315.8953772       292.3627861                 159
        3         3                   2085.98973        287.5966429  ...       316.0995398       293.0376745                 159
        4         4                  2089.323383        288.1380485  ...       316.3974799       293.5708836                 159
        5         5                  2092.214077         288.591646  ...       316.7350299       294.0200778                 159
        <BLANKLINE>
        [5 rows x 16 columns]

        **Example loading a .pkl with pandas.dataFrame**

        >>> filename = os.path.join(get_directory('pkl_files'), 'test_data.pkl')
        >>> df = RackioAI.load(filename)
        >>> print(df.head())
           Pipe-60 Totalmassflow_(KG/S)  Pipe-151 Totalmassflow_(KG/S)  Pipe-60 Pressure_(PA)  Pipe-151 Pressure_(PA)
        0                      37.83052                       37.83052               568097.3                352683.3
        1                      37.83918                       37.70243               568098.2                353449.8
        2                      37.83237                       37.67011               568783.2                353587.3
        3                      37.80707                       37.67344               569367.3                353654.8
        4                      37.76957                       37.69019               569933.5                353706.8

        ```
        """
        filename, ext = Utils.check_path(pathname, ext=ext)

        data = self.reader.read(filename, ext=ext, **kwargs)

        self.columns_name = Utils.get_column_names(data)

        if data.index.has_duplicates:

            data = data.reset_index(drop=True)

        if reset_index:

            data = data.reset_index(drop=True)

        self.columns_name = Utils.get_column_names(data)

        self._data = data

        return data
Beispiel #6
0
    def split(self, *arrays, **options):
        """
        Split arrays or matrices into random train and test subsets

        **Parameters**

        * **:*arrays:** (sequence of indexables with the same length / shape[0]) Allowed inputs are lists, numpy arrays,
        scipy-sparse matrices or pandas DataFrame

        * **:train_size:** (float or int, default=None) If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value
        is automatically set to the complement of the test size.

        * **:test_size:** (float or int, default=None) If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the
        value is set to the complement of the train size. If *train_size* is also None, it will be set to 0.30.

        * **:validation_size:** (float or int, default=None) If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the validation split. If int, represents the absolute number of test samples. If None, the
        value is set to the complement of the train size and test size. If *train_size* is also None, it will be set to 0.0.

        * **:random_state:** (int or RandomState instance, default=None) Controls the suffling applied to the data before
        applying split. Pass an int for reproducible output across multiple function calls.
        See [Glosary](https://scikit-learn.org/stable/glossary.html#term-random-state)

        * **:shuffle:** (bool, default=True) Whether or not to shuffle the data before splitting. If shuffle=False then stratify
         must be None.

        * **:stratify:** (array-like, default=None) If not None, data is split in a stratified fashion, using this as the class labels.

        **:return:**

        * **splitting:** (list, length=3 * len(arrays)) list containing train-test split of inputs.

        ___

        ## Snippet code
        ```python
        >>> from rackio_AI import  RackioAI
        >>> import numpy as np
        >>> preprocess = RackioAI.get("Preprocessing", _type="Preprocessing")
        >>> X, y = np.arange(20).reshape((10, 2)), range(10)
        >>> X
        array([[ 0,  1],
               [ 2,  3],
               [ 4,  5],
               [ 6,  7],
               [ 8,  9],
               [10, 11],
               [12, 13],
               [14, 15],
               [16, 17],
               [18, 19]])
        >>> list(y)
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

        ```
        ## Snippet code 2
        ```python
        >>> X_train, X_test, X_validation, y_train, y_test, y_validation = preprocess.splitter.split(X, y, train_size=0.6, test_size=0.2, validation_size=0.2, random_state=0)
        >>> X_train
        array([[ 0,  1],
               [ 2,  3],
               [ 4,  5],
               [ 6,  7],
               [ 8,  9],
               [10, 11]])
        >>> X_test
        array([[12, 13],
               [14, 15]])
        >>> X_validation
        array([[16, 17],
               [18, 19]])
        >>> y_train
        [0, 1, 2, 3, 4, 5]
        >>> y_test
        [6, 7]
        >>> y_validation
        [8, 9]

        ```

        ## Snippet code 3
        ```python
        >>> X_train, X_test, y_train, y_test = preprocess.splitter.split(X, y, train_size=0.6, test_size=0.4, random_state=0)
        >>> X_train
        array([[ 0,  1],
               [ 2,  3],
               [ 4,  5],
               [ 6,  7],
               [ 8,  9],
               [10, 11]])
        >>> X_test
        array([[12, 13],
               [14, 15],
               [16, 17],
               [18, 19]])
        >>> y_train
        [0, 1, 2, 3, 4, 5]
        >>> y_test
        [6, 7, 8, 9]


        ```
        """
        default_options = {
            'train_size': None,
            'test_size': None,
            'validation_size': None,
            'random_state': None,
            'shuffle': False,
            'stratify': None
        }

        data = [
            array.values if isinstance(array, pd.DataFrame) else array
            for array in arrays
        ]
        options = Utils.check_default_kwargs(default_options, options)
        train_size = options['train_size']
        test_size = options['test_size']
        self.validation_size = options.pop('validation_size')
        lst = [
            options['train_size'], options['test_size'], self.validation_size
        ]

        if lst.count(None) >= 1 or (options['train_size'] +
                                    options['test_size'] == 1):

            return self.__split(TRAIN_TEST_SPLIT, *data, **options)

        return self.__split(TRAIN_TEST_VALIDATION_SPLIT, *data, **options)
Beispiel #7
0
    def split_sequences(self,
                        df: pd.DataFrame,
                        timesteps,
                        stepsize: int = 1,
                        input_cols: list = None,
                        output_cols: list = None,
                        maxlen=None,
                        dtype: str = 'int32',
                        padding: str = 'pre',
                        truncating: str = 'pre',
                        value: float = 0.):
        """
        Splits dataframe in a 3D numpy array format supported by LSTM architectures using sliding windows concept.

        **Parameters**

        * **:param df:** (pandas.DataFrame) Contains inputs and outputs data
        * **:param timesteps:** (list or int) Timestep for each input variable.
            * If timestep is an int value, all input columns will be the same timestep
            * If timestep is a list, must be same lenght that input_cols argument
        * **:param stepsize:** (int, default = 1) step size for the sliding window
        * **:param input_cols:** (list, default = None) Column names that represents the input variables to LSTM
            * If input_cols is None the method assumes that inputs are all column except the last one.
        * **:param output_cols:** (list, default = None) Column names that represents the output variables to LSTM
            * If output_cols is None the method assumes that output is the last column.

        The rest of parameters represent the parameters for *pad_sequences* method, see its description.

        **returns**

        **sequences** (3D numpy array) dimensions (df.shape[0] - max(timesteps), max(timesteps), features)

        ```python
        >>> import numpy as np
        >>> from rackio_AI import RackioAI
        >>> a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90]).reshape(-1,1)
        >>> b = np.array([15, 25, 35, 45, 55, 65, 75, 85, 95]).reshape(-1,1)
        >>> c = np.array([a[i]+b[i] for i in range(len(a))]).reshape(-1,1)
        >>> data = np.hstack((a,b,c))
        >>> data
        array([[ 10,  15,  25],
               [ 20,  25,  45],
               [ 30,  35,  65],
               [ 40,  45,  85],
               [ 50,  55, 105],
               [ 60,  65, 125],
               [ 70,  75, 145],
               [ 80,  85, 165],
               [ 90,  95, 185]])
        >>> df = pd.DataFrame(data, columns=['a', 'b', 'c'])
        >>> preprocess = RackioAI.get("Preprocessing", _type="Preprocessing")
        >>> x, y = preprocess.lstm_data_preparation.split_sequences(df, 2)
        >>> x.shape
        (8, 2, 2)
        >>> x
        array([[[10., 15.],
                [20., 25.]],
        <BLANKLINE>
               [[20., 25.],
                [30., 35.]],
        <BLANKLINE>
               [[30., 35.],
                [40., 45.]],
        <BLANKLINE>
               [[40., 45.],
                [50., 55.]],
        <BLANKLINE>
               [[50., 55.],
                [60., 65.]],
        <BLANKLINE>
               [[60., 65.],
                [70., 75.]],
        <BLANKLINE>
               [[70., 75.],
                [80., 85.]],
        <BLANKLINE>
               [[80., 85.],
                [90., 95.]]])
        >>> y.shape
        (8, 1, 1)
        >>> y
        array([[[ 45.]],
        <BLANKLINE>
               [[ 65.]],
        <BLANKLINE>
               [[ 85.]],
        <BLANKLINE>
               [[105.]],
        <BLANKLINE>
               [[125.]],
        <BLANKLINE>
               [[145.]],
        <BLANKLINE>
               [[165.]],
        <BLANKLINE>
               [[185.]]])

        ```
        """

        if not input_cols:

            input_cols = Utils.get_column_names(df)
            input_cols = input_cols[:-1]

        if not output_cols:

            output_cols = Utils.get_column_names(df)
            output_cols = [output_cols[-1]]

        if isinstance(timesteps, list):

            if not len(timesteps) == len(input_cols):

                raise ValueError(
                    'timesteps and input_cols arguments must be same length')

        else:

            timesteps = [timesteps] * len(input_cols)

        input_data = df.loc[:, input_cols].values
        output_data = df.loc[:, output_cols].values
        iteration = list(
            range(0, input_data.shape[0] - max(timesteps) + stepsize,
                  stepsize))

        self.x_sequences = np.zeros(
            (len(iteration), max(timesteps), len(input_cols)))
        self.y_sequences = np.zeros((len(iteration), 1, len(output_cols)))

        self.start = 0

        options = {
            'output_data': output_data,
            'input_data': input_data,
            'timesteps': timesteps,
            'maxlen': maxlen,
            'dtype': dtype,
            'padding': padding,
            'truncating': truncating,
            'value': value
        }

        self.__split_sequences(iteration, **options)

        return self.x_sequences, self.y_sequences