Python MBB Exemples, SunSpot.bb_methods.MBB Python Exemples

Exemple #1

0

Afficher le fichier

def recurrent_class(data,
                    wdw_length,
                    scale=1,
                    n=50000,
                    n_neurons=[10],
                    n_epochs=30,
                    batch_size=50,
                    verbose=1):
    """
    Trains and validates a recurrent neural network (composed of one 
    recurrent hidden layer) for the monitoring.
    
    The network is trained to classify the shape of the deviations into 
    three different classes:
        - sudden jumps
        - more progressive drifts
        - oscillating shifts.

    Parameters
    ----------
    data : 2D-array
        in-control (IC) dataset (rows: time, columns: IC series).
    wdw_length : int > 0
        The length of the input data. 
    scale : float > 0, optional
        The scale parameter of the normal distribution that is used to simulate
        the size of the deviations. The default is 1.
    n : int > 0, optional
        Number of training and validating instances. This value is 
        typically large. The default is 50000.
    n_neurons : list of int, optional
        Number of neurons in the hidden layer. The default is 
        [10].
    n_epochs : int > 0, optional
        The number of epochs. The default is 30.
    batch-size : int > 0, optional
        The batch size. The default is 50.
    verbose : int, optional 
        Verbosity mode. 0=silent, 1=progress bar, 2=one line per epoch.
        The default is 1. 
        

    Returns
    -------
    model : (keras) model
        The trained neural network.
    scores : list
        The performances of the model.
        It contains the mean squared error (loss function) and the
        accuracy (metrics) of the network on the validation set.
    matrix : 2D-array (float)
        The confusion matrix of the classifier. 

    """
    wdw_length = int(wdw_length)
    assert wdw_length > 0, "wdw_length must be superior to zero"
    assert scale > 0, "scale must be superior to zero"
    n = int(n)
    assert n > 0, "n must be superior to zero"
    n_epochs = int(n_epochs)
    assert n_epochs > 0, "n_epochs must be superior to zero"
    batch_size = int(batch_size)
    assert batch_size > 0, "batch_size must be superior to zero"
    assert len(n_neurons) == 1, "n_neurons must be composed of 2 elements"
    #assert all(isinstance(item, int) for item in n_neurons), 'The number of neurons should be integer>0'

    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances
    blocks = bb.MBB(data, wdw_length)
    #blocks_training = bb.MBB(dataIC[:,:75],wdw_length)
    #blocks_testing = bb.MBB(dataIC[:,75:], wdw_length)

    ### training set
    X_train = np.zeros((n_train, wdw_length))
    Y_train = np.zeros((n_train))
    S_train = np.zeros((n_train))  #shapes
    rnd_shifts = np.random.normal(0, scale, n_train)

    for b in range(0, n_train - 2, 3):

        shift = rnd_shifts[b]
        series = resample(blocks, replace=True, n_samples=1).flatten()

        for shape in range(3):

            boot = np.copy(series)

            if shape == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                S_train[b] = 0
            elif shape == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (100) * (np.arange(wdw_length)**power) + boot
                S_train[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift * boot
                #plt.plot(boot); plt.show()
                S_train[b] = 2

            X_train[b] = boot
            Y_train[b] = shift
            b += 1

    ### testing set
    X_test = np.zeros((n_test, wdw_length))
    Y_test = np.zeros((n_test))
    S_test = np.zeros((n_test))
    rnd_shifts = np.random.normal(0, scale, n_test)

    for b in range(0, n_test - 2, 3):

        shift = rnd_shifts[b]
        series = resample(blocks, replace=True, n_samples=1).flatten()

        for shape in range(3):

            boot = np.copy(series)

            if shape == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                S_test[b] = 0
            elif shape == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (100) * (np.arange(wdw_length)**power) + boot
                S_test[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift * boot
                S_test[b] = 2

            X_test[b] = boot
            Y_test[b] = shift
            b += 1

    ### Neural network Architecture (classification)
    model = tf.keras.Sequential()  #linear stack of layers
    model.add(layers.SimpleRNN(n_neurons[0], input_dim=wdw_length))
    model.add(layers.Dense(3, activation='softmax'))  # three output classes
    if verbose > 0:
        model.summary()

    #compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    S_train = to_categorical(S_train)  #hot encoding
    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    model.fit(X_train,
              S_train,
              epochs=n_epochs,
              batch_size=batch_size,
              verbose=verbose)

    # make class predictions with the model
    S_test = to_categorical(S_test)
    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    #predictions = model.predict_classes(X_test)
    predictions = np.argmax(model.predict(X_test), axis=-1)
    predictions = to_categorical(predictions)
    scores = model.evaluate(X_test, S_test, verbose=verbose)
    #confusion matrix
    matrix = metrics.confusion_matrix(predictions.argmax(axis=1),
                                      S_test.argmax(axis=1),
                                      normalize='true')

    return model, scores, matrix

Exemple #2

0

Afficher le fichier

def feed_forward_reg(data,
                     wdw_length,
                     scale=1,
                     n=50000,
                     n_hidden=2,
                     n_neurons=[40, 20],
                     activation='sigmoid',
                     n_epochs=30,
                     batch_size=50,
                     verbose=1):
    """
    Trains and validates a feed-forward neural network for the monitoring.
    
    The network is trained to predict in a continuous range the size of the 
    deviations (i.e. they are designed for regression purposes). 

    Parameters
    ----------
    data : 2D-array
        in-control (IC) dataset (rows: time, columns: IC series).
    wdw_length : int > 0
        The length of the input data. 
    scale : float > 0, optional
        The scale parameter of the normal distribution that is used to simulate
        the size of the deviations. The default is 1.
    n : int > 0, optional
        Number of training and validating instances. This value is 
        typically large. The default is 50000.
    n_hidden : int > 0, optional
        Number of hidden layers. The defaults is 2. 
    n_neurons : list of int, optional
        Number of neurons in each hidden layer. The default is 
        [40, 20].
    activation : string, optional
        The activation function. The default is 'sigmoid'.
        Other values are 'relu', 'softmax', 'softplus', 'tanh' etc. 
        (see keras documentation)
    n_epochs : int > 0, optional
        The number of epochs. The default is 30.
    batch-size : int > 0, optional
        The batch size. The default is 50.
    verbose : int, optional 
        Verbosity mode. 0=silent, 1=progress bar, 2=one line per epoch.
        The default is 1. 

    Returns
    -------
    model : (keras) model
        The trained neural network.
    scores : list
        The performances of the model.
        It contains the mean squared error (loss function), the mean absolute
        error and the mean absolute percentage error (metrics) 
        of the network on the validation set.

    """
    wdw_length = int(wdw_length)
    assert wdw_length > 0, "wdw_length must be superior to zero"
    assert scale > 0, "scale must be superior to zero"
    n = int(n)
    assert n > 0, "n must be superior to zero"
    n_hidden = int(n_hidden)
    assert n_hidden > 0, "n_hidden must be superior to zero"
    assert n_hidden == len(
        n_neurons), 'The neurons do not match the number of hidden layers'
    #assert all(isinstance(item, int) for item in n_neurons), 'The number of neurons should be integer>0'
    n_epochs = int(n_epochs)
    assert n_epochs > 0, "n_epochs must be superior to zero"
    batch_size = int(batch_size)
    assert batch_size > 0, "batch_size must be superior to zero"

    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances
    blocks = bb.MBB(data, wdw_length)

    ### training set
    X_train = np.zeros((n_train, wdw_length))
    Y_train = np.zeros((n_train))
    S_train = np.zeros((n_train))  #shapes
    rnd_shifts = np.random.normal(0, scale, n_train)  #shift sizes

    for b in range(0, n_train - 2, 3):

        shift = rnd_shifts[b]
        series = resample(blocks, replace=True, n_samples=1).flatten()

        for shape in range(3):

            boot = np.copy(series)

            if shape == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                S_train[b] = 0
            elif shape == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (500) * (np.arange(wdw_length)**power) + boot
                S_train[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift + boot
                S_train[b] = 2

            X_train[b] = boot
            Y_train[b] = shift
            b += 1

    ### testing set
    X_test = np.zeros((n_test, wdw_length))
    Y_test = np.zeros((n_test))
    S_test = np.zeros((n_test))
    rnd_shifts = np.random.normal(0, scale, n_test)

    for b in range(0, n_test - 2, 3):

        shift = rnd_shifts[b]

        series = resample(blocks, replace=True, n_samples=1).flatten()

        for shape in range(3):

            boot = np.copy(series)

            if shape == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                S_test[b] = 0
            elif shape == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (500) * (np.arange(wdw_length)**power) + boot
                S_test[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift + boot
                S_test[b] = 2

            X_test[b] = boot
            Y_test[b] = shift
            b += 1

    ### scaling input and output data
    # if norm:
    #     input_scaler = MinMaxScaler(feature_range=(-1,1))
    #     input_scaler.fit(X_train)
    #     X_train = input_scaler.transform(X_train)
    #     X_test = input_scaler.transform(X_test)

    #     Y_train = Y_train.reshape(-1,1)
    #     Y_test = Y_test.reshape(-1,1)
    #     output_scaler = MinMaxScaler(feature_range=(-1,1))
    #     output_scaler.fit(Y_train)
    #     Y_train = output_scaler.transform(Y_train)
    #     Y_test = output_scaler.transform(Y_test)

    ### Neural network Architecture
    model = tf.keras.Sequential()  #linear stack of layers
    for i in range(n_hidden):
        model.add(
            layers.Dense(n_neurons[i],
                         input_dim=wdw_length,
                         activation=activation,
                         use_bias=True))
    model.add(layers.Dense(
        1,
        use_bias=True))  # no activation in the output layer since regression
    if verbose > 0:
        model.summary()

    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

    model.fit(X_train,
              Y_train,
              epochs=n_epochs,
              batch_size=batch_size,
              verbose=verbose)

    # make predictions with the model
    predictions = model.predict(X_test)

    # if norm:
    #     X_test = input_scaler.inverse_transform(X_test)
    #     Y_test = output_scaler.inverse_transform(Y_test)
    #     predictions = output_scaler.inverse_transform(predictions)
    #     Y_test = Y_test.reshape(-1)

    predictions = predictions.reshape(-1)
    ind = np.where(abs(Y_test) > 0.1)
    mape = (1 / len(Y_test[ind])) * sum(
        abs((abs(Y_test[ind]) - abs(predictions[ind])) /
            abs(Y_test[ind]))) * 100
    scores = model.evaluate(X_test, Y_test, verbose=verbose)
    scores.append(mape)

    return model, scores

Exemple #3

0

Afficher le fichier

def choice_C(data,
             L_plus,
             delta_min,
             wdw_length,
             scale,
             start=1,
             stop=10,
             step=1,
             delay=0,
             L_minus=None,
             k=None,
             n=36000,
             n_series=500,
             epsilon=0.001,
             block_length=None,
             BB_method='MBB',
             confusion=False,
             verbose=True):
    """
    Selects an appropriate value for the regularization parameter (C) of the 
    svm procedures. 
    
    The procedure is implemented as follows.
    For each value of C, the regressor and classifier are trained and validated.
    Then, the values of C that maximize/minimize different performance 
    measures are returned. 
    The training (and validating) procedure works as explained below.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure.
    A shift size is then sampled from a halfnormal distribution (supported by 
    [delta_min, +inf]) with a specified scale parameter.
    A jump, an oscillating shift and a drift of previous size 
    are then added on top of the sample to create artificial deviations. 
    The classifer is then trained to recognize the form of deviations among the 
    three general classes: 'jump', 'drift' or 'oscillation' whereas
    the regressor learns to predict the shift sizes in a continuous range.
    
    Once the learning is finished, a validation step is also applied on 
    unseen deviations to evaluate the performances of the svr and svc. 
    Three criteria are computed: the mean absolute percentage
    error (MAPE), the mean squared error (MSE) and the accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    delta_min : float > 0
        The target minimum shift size. 
    wdw_length : int > 0
        The length of the input vector.
    scale : float > 0
         The scale parameter of the halfnormal distribution 
         (similar to the variance of a normal distribution). 
         A typical range of values for scale is [1,4], depending on the size
         of the actual deviations
    start : float > 0, optional
        Starting value for C. Default is 1.
    stop : float > 0, optional
        Stopping value for C. Default is 10.
    step : float > 0, optional
        Step value for C. The function tests different values of C in the 
        range [start, stop] with step value equal to 'step'. Default is 1.
    delay : int, optional
        Flag to start the chart after a delay randomly selected from the
        interval [0, delay]. Default is 0 (no delay). 
    L_minus :  float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    n : int > 0, optional      
        Number of training and validating instances. This value is 
        typically large. Default is 36000.
    n_series : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 500. 
    epsilon : float, optional
        Parameter of the svr, which represents the approximation accuracy. 
        Default is 0.001.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    confusion : bool, optional 
        Flag to show the confusion matrix (measure of the classification accuracy, 
        class by class). Default is False.  
    verbose : bool, optional    
        Flag to print infos about C. Default is True.
          
    Returns 
    ------
    min_MAPE : float > 0
        The value of C that minimizes the MAPE (mean absolute percentage error).
    min_MSE : float > 0
        The value of C that minimizes the MSE (mean squared error).
    max_accuracy : float > 0
        The value of C that maximizes the accuracy.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n_series / blocks.shape[1]))

    wdw_length = int(np.ceil(wdw_length))  #should be integer

    delay = int(delay)
    n = int(n)
    assert n > 0, "n must be strictly positive"
    if n % 3 == 2:  #n should be multiple of 3
        n += 1
    if n % 3 == 1:
        n += 2

    if L_minus is None:
        L_minus = -L_plus
    if k is None:
        k = delta_min / 2

    sign = 1
    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances

    n_C = int(np.ceil((stop - start) / step))
    MAPE = np.zeros((n_C))
    MSE = np.zeros((n_C))
    accuracy = np.zeros((n_C))
    count = 0
    C_values = np.arange(start, stop, step)
    for C in np.arange(start, stop, step):

        ### training
        input_train = np.zeros((n_train, wdw_length))
        size_train = np.zeros((n_train))
        form_train = np.zeros((n_train))
        rnd = halfnorm(scale=scale).rvs(
            size=n_train) + delta_min  #size of shifts
        delay_rnd = 0
        for b in range(0, n_train - 2, 3):

            shift = rnd[b] * sign
            if BB_method == 'MABB':
                series = bb.resample_MatchedBB(data, block_length, n=n_series)
            else:
                series = resample(blocks, replace=True,
                                  n_samples=n_blocks).flatten()[:n_series]

            #simulate a random delay
            if delay > 0:
                delay_rnd = np.random.randint(delay)

            for rnd_form in range(3):

                boot = np.copy(series)

                if rnd_form == 0:
                    boot[wdw_length:] = boot[wdw_length:] + shift
                    form_train[b] = 0
                elif rnd_form == 1:
                    power = np.random.uniform(1.5, 2)
                    boot = shift / (n_series) * (np.arange(0, n_series)**
                                                 power) + boot
                    form_train[b] = 1
                else:
                    eta = np.random.uniform(np.pi / (wdw_length),
                                            3 * np.pi / wdw_length)
                    boot = np.sin(
                        eta * np.pi * np.arange(n_series)) * shift * boot
                    form_train[b] = 2

                size_train[b] = shift

                input_plus = boot[wdw_length:wdw_length * 2]
                C_plus = np.zeros((n_series, 1))
                for i in range(
                        wdw_length + delay_rnd,
                        n_series):  #start the monitoring after random delay
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    if C_plus[i] > L_plus:
                        input_plus = boot[i + 1 - wdw_length:i + 1]
                        break

                input_minus = boot[wdw_length:wdw_length * 2]
                C_minus = np.zeros((n_series, 1))
                for j in range(wdw_length + delay_rnd, n_series):
                    C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                    if C_minus[j] < L_minus:
                        input_minus = boot[j + 1 - wdw_length:j + 1]
                        break

                if i > j:  #save first alert recorded
                    input_train[b, :] = input_minus
                else:
                    input_train[b, :] = input_plus

                b += 1
            sign = -sign

        ### train the models
        regressor = SVR(C=C, epsilon=epsilon)
        regressor.fit(input_train, size_train)
        clf = svm.SVC(C=C)
        clf.fit(input_train, form_train)

        ###testing
        input_test = np.zeros((n_test, wdw_length))
        label_test = np.zeros((n_test))
        form_test = np.zeros((n_test))
        rnd = halfnorm(scale=scale).rvs(size=n_test) + delta_min
        delay_rnd = 0
        for b in range(0, n_test - 2, 3):

            shift = rnd[b] * sign
            if BB_method == 'MABB':
                series = bb.resample_MatchedBB(data, block_length, n=n_series)
            else:
                series = resample(blocks, replace=True,
                                  n_samples=n_blocks).flatten()[:n_series]

            #simulate a random delay
            if delay > 0:
                delay_rnd = np.random.randint(delay)

            for rnd_form in range(3):

                boot = np.copy(series)

                if rnd_form == 0:
                    boot[wdw_length:] = boot[wdw_length:] + shift
                    form_test[b] = 0
                elif rnd_form == 1:
                    power = np.random.uniform(1.5, 2)
                    boot = shift / (n_series) * (np.arange(0, n_series)**
                                                 power) + boot
                    form_test[b] = 1
                else:
                    eta = np.random.uniform(np.pi / (wdw_length),
                                            3 * np.pi / wdw_length)
                    boot = np.sin(
                        eta * np.pi * np.arange(n_series)) * shift * boot
                    form_test[b] = 2
                label_test[b] = shift

                input_plus = boot[wdw_length:wdw_length * 2]
                C_plus = np.zeros((n_series, 1))
                for i in range(wdw_length + delay_rnd, n_series):
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    if C_plus[i] > L_plus:
                        input_plus = boot[i + 1 - wdw_length:i + 1]
                        break

                input_minus = boot[wdw_length:wdw_length * 2]
                C_minus = np.zeros((n_series, 1))
                for j in range(wdw_length + delay_rnd, n_series):
                    C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                    if C_minus[j] < L_minus:
                        input_minus = boot[j + 1 - wdw_length:j + 1]
                        break

                if i > j:  #first alert recorded
                    input_test[b, :] = input_minus
                else:
                    input_test[b, :] = input_plus

                b += 1
            sign = -sign

        ### compute accuracy and other precision measures
        label_pred = regressor.predict(input_test)
        label_pred_clf = clf.predict(input_test)

        #regressor
        MAPE[count] = (1 / len(label_pred)) * sum(
            np.abs((np.abs(label_test) - np.abs(label_pred)) /
                   np.abs(label_test))) * 100
        MSE[count] = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        #classifier
        accuracy[count] = sum(
            label_pred_clf == form_test) * 100 / len(label_pred_clf)

        ### compute the confusion matrix
        if confusion:
            class_names = ['jump', 'drift', 'oscill.']
            titles_options = [("Confusion matrix, without normalization",
                               None), ("Normalized confusion matrix", 'true')]
            for title, normalize in titles_options:
                disp = plot_confusion_matrix(clf,
                                             input_test,
                                             form_test,
                                             display_labels=class_names,
                                             cmap=plt.cm.Blues,
                                             normalize=normalize)
                disp.ax_.set_title(title)
                print(title)
                print(disp.confusion_matrix)
            plt.show()

        count += 1

    min_MAPE = C_values[np.argmin(MAPE)]
    min_MSE = C_values[np.argmin(MSE)]
    max_accuracy = C_values[np.argmax(accuracy)]

    if verbose:
        print('C value that minimizes the MAPE:', min_MAPE)
        print('C value that minimizes the MSE:', min_MSE)
        print('C value that maximizes the accuracy:', max_accuracy)

    return min_MAPE, min_MSE, max_accuracy

Exemple #4

0

Afficher le fichier

def simple_svm(data,
               wdw_length,
               scale,
               n=50000,
               C=1.0,
               epsilon=0.001,
               kernel='rbf',
               degree=3,
               precision=True,
               confusion=True):
    """
    Trains the support vector machine classifier (svc) and regressor (svr).
    
    For each monte-carlo run, a new block of observations is sampled from the 
    IC data using a block boostrap procedure.
    A shift size is then sampled from a normal distribution with a 
    specified scale parameter.
    A jump, an oscillating shift (with random frequency in the interval 
    [pi/(2*wdw_length), 2*pi/wdw_length]) and a drift (with random power-law
    functions in the range [1,2]) of previous size 
    are then added on top of the sample to create artificial deviations. 
    The classifer is then trained to recognize the form of deviations among the 
    three general classes: 'jump', 'drift' or 'oscillation' whereas 
    the regressor learns to predict the shift sizes in a
    continuous range. 
    Once the learning is finished, a validation step is also applied on 
    unseen deviations to evaluate the performances of the svr and svc.
    Three criteria are computed: the mean absolute percentage error
    (MAPE), the mean squared error (MSE) and the accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    wdw_length : int > 0
        The length of the input vector.
    scale : float > 0
         The standard deviation of the normal distribution. 
         A typical range of values for scale is [1,4], depending on the size
         of the actual deviations
    n : int > 0, optional      
        Number of training and validating instances. This value is 
        typically large. Default is 50000.
    C : float > 0, optional
        Regularization parameter of the svr and svc (the strength of the 
        regularization is inversely proportional to C).
        Default is 1. Typical range is [1, 10].
    epsilon : float, optional
        Parameter of the svr, which represents the approximation accuracy. 
        Default is 0.001.
    kernel : str, optional
        The kernel function to be used in the svm procedures. 
        Values should be selected among: 'rbf', 'linear', 'sigmoid' and 'poly'. 
        Default is 'rbf'.
    degree : int > 0, optional
        The degree of the polynomial kernel. Only used when kernel='poly'.
        Default is 3.
    precision : bool, optional    
        Flag to print accuracy measures. Default is True.
    confusion : bool, optional 
        Flag to show the confusion matrix (measure of the classification accuracy, 
        class by class). Default is True.        
          
    Returns 
    ------
    clf : support vector classification model
        The trained classifier.
    regressor : support vector regression model
        The trained regressor.
        
    """
    wdw_length = int(np.ceil(wdw_length))  #should be integer
    blocks = bb.MBB(data, wdw_length)

    n = int(n)
    assert n > 0, "n must be strictly positive"
    if n % 3 == 2:  #n should be multiple of 3
        n += 1
    if n % 3 == 1:
        n += 2
    assert degree > 0, "degree must be strictly positive"
    degree = int(degree)
    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances

    ### training
    input_train = np.zeros((n_train, wdw_length))
    size_train = np.zeros((n_train))
    form_train = np.zeros((n_train))
    rnd = np.random.normal(0, scale, n_train)  #size of shifts

    for b in range(0, n_train - 2, 3):

        shift = rnd[b]
        series = resample(blocks, replace=True, n_samples=1).flatten()

        for rnd_form in range(3):
            boot = np.copy(series)

            if rnd_form == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                form_train[b] = 0
            elif rnd_form == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (500) * (np.arange(wdw_length)**power) + boot
                form_train[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift + boot
                form_train[b] = 2

            size_train[b] = shift
            input_train[b, :] = boot

            b += 1

    ### train the models
    regressor = SVR(C=C, epsilon=epsilon, kernel=kernel, degree=degree)
    regressor.fit(input_train, size_train)
    clf = svm.SVC(C=C, kernel=kernel, degree=degree)
    clf.fit(input_train, form_train)

    ### testing
    input_test = np.zeros((n_test, wdw_length))
    label_test = np.zeros((n_test))
    form_test = np.zeros((n_test))
    rnd = np.random.normal(0, scale, n_test)  #size of shifts

    for b in range(0, n_test - 2, 3):

        shift = rnd[b]
        series = resample(blocks, replace=True, n_samples=1).flatten()

        for rnd_form in range(3):
            boot = np.copy(series)

            if rnd_form == 0:
                delay = np.random.randint(0, wdw_length)
                boot[delay:] = boot[delay:] + shift
                form_test[b] = 0
            elif rnd_form == 1:
                power = np.random.uniform(1, 2)
                boot = shift / (500) * (np.arange(wdw_length)**power) + boot
                form_test[b] = 1
            else:
                eta = np.random.uniform(np.pi / (2 * wdw_length),
                                        2 * np.pi / wdw_length)
                phi = np.random.randint(0, int(wdw_length / 4))
                boot = np.sin(eta * np.pi * np.arange(wdw_length) +
                              phi) * shift + boot
                form_test[b] = 2

            label_test[b] = shift
            input_test[b, :] = boot

            b += 1

    ### compute accuracy and other precision measures
    label_pred = regressor.predict(input_test)
    label_pred_clf = clf.predict(input_test)

    if precision:
        #regressor
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE =', MAPE)
        print('MSE =', MSE)

        label_pred = abs(label_pred)
        label_test = abs(label_test)
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE without signs =', MAPE)
        print('MSE without signs =', MSE)

        #classifier
        accuracy = sum(label_pred_clf == form_test) * 100 / len(label_pred_clf)
        #MAE = (1/len(label_pred_clf)) * sum(np.abs(form_test - label_pred_clf))
        #MSE = (1/len(label_pred_clf)) * sum((form_test - label_pred_clf)**2)
        print('Accuracy =', accuracy)

    ### compute the confusion matrix
    if confusion:
        class_names = ['jump', 'drift', 'oscill.']
        titles_options = [("Confusion matrix, without normalization", None),
                          ("Normalized confusion matrix", 'true')]
        for title, normalize in titles_options:
            disp = plot_confusion_matrix(clf,
                                         input_test,
                                         form_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
            disp.ax_.set_title(title)
            print(title)
            print(disp.confusion_matrix)
            print(disp.confusion_matrix[2, 1] / n_test)
        plt.show()

    return (regressor, clf)

Exemple #5

0

Afficher le fichier

def training_svm(data,
                 L_plus,
                 delta_min,
                 wdw_length,
                 scale,
                 delay=0,
                 L_minus=None,
                 k=None,
                 n=63000,
                 n_series=500,
                 C=1.0,
                 epsilon=0.001,
                 kernel='rbf',
                 degree=3,
                 block_length=None,
                 BB_method='MBB',
                 precision=True,
                 confusion=True):
    """
    Trains the support vector machine classifier (svc) and regressor (svr).
    
    The training (and validating) procedure works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure.
    A shift size is then sampled from a halfnormal distribution (supported by 
    [delta_min, +inf]) with a specified scale parameter.
    A jump, an oscillating shift (with random frequency in the interval 
    [pi/(wdw_length), 3*pi/wdw_length]) and a drift (with random power-law
    functions in the range [1.5,2]) of previous size 
    are then added on top of the sample to create artificial deviations. 
    The classifer is then trained to recognize the form of deviations among the 
    three general classes: 'jump', 'drift' or 'oscillation' whereas 
    the regressor learns to predict the shift sizes in a
    continuous range. 
    Once the learning is finished, a validation step is also applied on 
    unseen deviations to evaluate the performances of the svr and svc.
    Three criteria are computed: the mean absolute percentage error
    (MAPE), the mean squared error (MSE) and the accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    delta_min : float > 0
        The target minimum shift size. 
    wdw_length : int > 0
        The length of the input vector.
    scale : float > 0
         The scale parameter of the halfnormal distribution 
         (similar to the variance of a normal distribution). 
         A typical range of values for scale is [1,4], depending on the size
         of the actual deviations
    delay : int, optional
        Flag to start the chart after a delay, randomly selected from the
        interval [0, delay]. Default is 0 (no delay). 
    L_minus :  float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    n : int > 0, optional      
        Number of training and validating instances. This value is 
        typically large. Default is 63000.
    n_series : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 500. 
    C : float > 0, optional
        Regularization parameter of the svr and svc (the strength of the 
        regularization is inversely proportional to C).
        Default is 1. Typical range is [1, 10].
    epsilon : float, optional
        Parameter of the svr, which represents the approximation accuracy. 
        Default is 0.001.
    kernel : str, optional
        The kernel function to be used in the svm procedures. 
        Values should be selected among: 'rbf', 'linear', 'sigmoid' and 'poly'. 
        Default is 'rbf'.
    degree : int > 0, optional
        The degree of the polynomial kernel. Only used when kernel='poly'.
        Default is 3.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    precision : bool, optional    
        Flag to print accuracy measures. Default is True.
    confusion : bool, optional 
        Flag to show the confusion matrix (measure of the classification accuracy, 
        class by class). Default is True.        
          
    Returns 
    ------
    clf : support vector classification model
        The trained classifier.
    regressor : support vector regression model
        The trained regressor.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n_series / blocks.shape[1]))

    wdw_length = int(np.ceil(wdw_length))  #should be integer

    delay = int(delay)
    n = int(n)
    assert n > 0, "n must be strictly positive"
    if n % 3 == 2:  #n should be multiple of 3
        n += 1
    if n % 3 == 1:
        n += 2

    if L_minus is None:
        L_minus = -L_plus
    if k is None:
        k = delta_min / 2

    assert degree > 0, "degree must be strictly positive"
    degree = int(degree)

    sign = 1
    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances

    ### training
    input_train = np.zeros((n_train, wdw_length))
    size_train = np.zeros((n_train))
    form_train = np.zeros((n_train))
    rnd = halfnorm(scale=scale).rvs(size=n_train) + delta_min  #size of shifts
    delay_rnd = 0
    for b in range(0, n_train - 2, 3):

        shift = rnd[b] * sign
        if BB_method == 'MABB':
            series = bb.resample_MatchedBB(data, block_length, n=n_series)
        else:
            series = resample(blocks, replace=True,
                              n_samples=n_blocks).flatten()[:n_series]

        #simulate a random delay
        if delay > 0:
            delay_rnd = np.random.randint(delay)

        for rnd_form in range(3):
            boot = np.copy(series)

            if rnd_form == 0:  #jump
                boot[wdw_length:] = boot[wdw_length:] + shift
                form_train[b] = 0
            elif rnd_form == 1:  #drift
                power = np.random.uniform(1.5, 2)
                boot = shift / (n_series) * (np.arange(n_series)**power) + boot
                form_train[b] = 1
            elif rnd_form == 2:  #oscillating shift
                #eta = np.random.uniform(np.pi/(2*wdw_length), 2*np.pi/wdw_length)
                eta = np.random.uniform(np.pi / (wdw_length),
                                        3 * np.pi / wdw_length)
                boot = np.sin(eta * np.pi * np.arange(n_series)) * shift * boot
                form_train[b] = 2

            size_train[b] = shift

            input_plus = boot[wdw_length:wdw_length * 2]  #default is not alert
            C_plus = np.zeros((n_series, 1))
            for i in range(wdw_length + delay_rnd,
                           n_series):  #start the monitoring after random delay
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                if C_plus[i] > L_plus:
                    input_plus = boot[i + 1 - wdw_length:i + 1]
                    break

            input_minus = boot[wdw_length:wdw_length *
                               2]  #default is not alert
            C_minus = np.zeros((n_series, 1))
            for j in range(wdw_length + delay_rnd, n_series):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                if C_minus[j] < L_minus:
                    input_minus = boot[j + 1 - wdw_length:j + 1]
                    break

            if i > j:  #save first alert recorded
                input_train[b, :] = input_minus
            else:
                input_train[b, :] = input_plus

            b += 1
        sign = -sign

    ### train the models
    regressor = SVR(C=C, epsilon=epsilon, kernel=kernel, degree=degree)
    regressor.fit(input_train, size_train)
    clf = svm.SVC(C=C, kernel=kernel, degree=degree)
    clf.fit(input_train, form_train)

    ###testing
    input_test = np.zeros((n_test, wdw_length))
    label_test = np.zeros((n_test))
    form_test = np.zeros((n_test))
    rnd = halfnorm(scale=scale).rvs(size=n_test) + delta_min
    delay_rnd = 0
    for b in range(0, n_test - 2, 3):

        shift = rnd[b] * sign
        if BB_method == 'MABB':
            series = bb.resample_MatchedBB(data, block_length, n=n_series)
        else:
            series = resample(blocks, replace=True,
                              n_samples=n_blocks).flatten()[:n_series]

        #simulate a random delay
        if delay > 0:
            delay_rnd = np.random.randint(delay)

        for rnd_form in range(3):

            boot = np.copy(series)

            if rnd_form == 0:
                boot[wdw_length:] = boot[wdw_length:] + shift
                form_test[b] = 0
            elif rnd_form == 1:
                power = np.random.uniform(1.5, 2)
                boot = shift / (n_series) * (np.arange(n_series)**power) + boot
                form_test[b] = 1
            else:
                #eta = np.random.uniform(np.pi/(2*wdw_length), 2*np.pi/wdw_length)
                eta = np.random.uniform(np.pi / (wdw_length),
                                        3 * np.pi / wdw_length)
                boot = np.sin(eta * np.pi * np.arange(n_series)) * shift * boot
                form_test[b] = 2

            label_test[b] = shift

            input_plus = boot[wdw_length:wdw_length * 2]  #default is not alert
            C_plus = np.zeros((n_series, 1))
            for i in range(wdw_length + delay_rnd, n_series):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                if C_plus[i] > L_plus:
                    input_plus = boot[i + 1 - wdw_length:i + 1]
                    break

            input_minus = boot[wdw_length:wdw_length *
                               2]  #default is not alert
            C_minus = np.zeros((n_series, 1))
            for j in range(wdw_length + delay_rnd, n_series):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                if C_minus[j] < L_minus:
                    input_minus = boot[j + 1 - wdw_length:j + 1]
                    break

            if i > j:  #first alert recorded
                input_test[b, :] = input_minus
            else:
                input_test[b, :] = input_plus

            b += 1
        sign = -sign

    ### compute accuracy and other precision measures
    label_pred = regressor.predict(input_test)
    label_pred_clf = clf.predict(input_test)

    if precision:
        #regressor
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE =', MAPE)
        print('MSE =', MSE)

        label_pred = abs(label_pred)
        label_test = abs(label_test)
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE without signs =', MAPE)
        print('MSE without signs =', MSE)

        #classifier
        accuracy = sum(label_pred_clf == form_test) * 100 / len(label_pred_clf)
        #MAE = (1/len(label_pred_clf)) * sum(np.abs(form_test - label_pred_clf))
        #MSE = (1/len(label_pred_clf)) * sum((form_test - label_pred_clf)**2)
        print('Accuracy =', accuracy)

    ### compute the confusion matrix
    if confusion:
        class_names = ['jump', 'drift', 'oscill.']
        titles_options = [("Confusion matrix, without normalization", None),
                          ("Normalized confusion matrix", 'true')]
        for title, normalize in titles_options:
            disp = plot_confusion_matrix(clf,
                                         input_test,
                                         form_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
            disp.ax_.set_title(title)
            print(title)
            print(disp.confusion_matrix)
            print(disp.confusion_matrix[2, 1] / n_test)
        plt.show()

    return (regressor, clf)

Exemple #6

0

Afficher le fichier

def input_vector_length(data,
                        delta_min,
                        L_plus,
                        L_minus=None,
                        k=None,
                        nmc=4000,
                        n=2000,
                        qt=None,
                        block_length=None,
                        BB_method='MBB',
                        plot=False):
    """ 
    Computes the length of the input vector for the support vector machine 
    procedures (svms). 
    
    The length of the input vector represents the number of past observations 
    that are fed to the svms after each alert. The regressor and classifier 
    then predict the form and the size of the shift that causes the alert
    based on the input vector.
    Intuitively, the length should be sufficiently large to ensure that most 
    of the shifts are contained within the input vector while maintaining 
    the computing efficiency of the method. This is usually 
    not a problem for the large shifts that are quickly detected by the chart. 
    However the smallest shifts may be detected only after a certain amount 
    of time and therefore require larger vectors. 
    Hence, the length is selected as an upper quantile of the run length 
    distribution, computed on data shifted by the smallest shift size 
    that we aim to detect.
    
    It is implemented as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. Then, a jump of size 
    "delta_min' is simulated on top of the sample. 
    The run length of the chart is then evaluated. The length of the input 
    vector is finally selected as a specified quantile of the run length
    distribution. If the quantile is unspecified, an optimal quantile 
    is selected by locating the 'knee' of the quantiles curve.
        
    
    Parameters: 
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    delta_min : float >= 0
        The target minimum shift size. 
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 2000. 
    n : int >= 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 2000. 
    qt :  float in [0,1], optional
        Quantile of the run length distribution (used to select an appropriate
        input vector length). Default is None. 
        When None, the appropriate quantile is selected with a knee locator. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    plot :  bool, optional 
        Flag to show the histogram of the run length distribution. 
        Default is False.
         
    Returns
    -------
    m : int > 0
        The length of the input vector.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    if k is None:
        k = delta_min / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n >= 0, "n must be superior or equal to zero"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    RL1_plus = np.zeros((nmc, 1))
    RL1_minus = np.zeros((nmc, 1))
    RL1_plus[:] = np.nan
    RL1_minus[:] = np.nan
    for b in range(nmc):

        #sample data with BB and shift them by delta_min
        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]
        boot = boot + delta_min

        C_plus = np.zeros((n, 1))
        for i in range(1, n):
            C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
            if C_plus[i] > L_plus:
                RL1_plus[b] = i
                break

        C_minus = np.zeros((n, 1))
        for j in range(1, n):
            C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
            if C_minus[j] < L_minus:
                RL1_minus[b] = j
                break

        if np.isnan(RL1_plus[b]):
            RL1_plus[b] = n
        if np.isnan(RL1_minus[b]):
            RL1_minus[b] = n

    RL = (1 / (RL1_minus) + 1 / (RL1_plus))**(-1)

    if plot:
        plt.figure(1)
        plt.hist(RL[~np.isnan(RL)],
                 range=[-4, 100],
                 bins='auto',
                 density=True,
                 facecolor='b')
        plt.title("Run length distribution")
        plt.axis([-4, 100, 0, 0.2])
        plt.grid(True)
        plt.show()

    if qt is not None:
        ### select m with a specified quantile
        m = int(np.quantile(RL[~np.isnan(RL)], qt))
    else:
        ### select m with knee locator
        y = np.zeros((100))
        c = 0
        x = np.arange(1, 0.5, -0.05)
        for q in np.arange(1, 0.5, -0.05):
            y[c] = np.quantile(RL[~np.isnan(RL)], q)
            c += 1

        y = y[:len(x)]
        if plot:
            plt.plot(x, y)
            plt.xlabel('quantile')
            plt.ylabel('run length')
            plt.title('Run length at different quantiles')
            plt.show()
        coef = np.polyfit(x, y, deg=1)
        coef_curve = np.polyfit(x, y, deg=2)
        if coef_curve[0] < 0:
            curve = 'concave'
        else:
            curve = 'convex'
        if coef[0] < 0:  #slope is positive
            direction = 'decreasing'
        else:  #slope is negative
            direction = 'increasing'
        kn = KneeLocator(x, y, curve=curve, direction=direction)
        knee = kn.knee
        m = int(np.quantile(RL[~np.isnan(RL)], knee))

    return m

Exemple #7

0

Afficher le fichier

classifier = model_from_json(loaded_model_json)
# load weights into new model
classifier.load_weights("nn_models/rnn_classification_27.h5")

#========================================================================
### Compute the predictions (sizes and shapes) of the networks
#=======================================================================

### for a particular station
stat = [i for i in range(len(station_names)) if station_names[i] == 'UC'][0]

### separate the data from the selected station into blocks
blocks = np.zeros((n_obs, block_length))
blocks[:] = np.nan
blocks[block_length - 1:, :] = bb.MBB(data[:, stat].reshape(-1, 1),
                                      block_length,
                                      NaN=True,
                                      all_NaN=False)

### interpolate NaNs in input vectors
input_valid, ind = svm.fill_nan(blocks)
### reshape input vectors to match input dimensions
input_valid = np.reshape(input_valid,
                         (input_valid.shape[0], 1, input_valid.shape[1]))

### apply classifier and regressor on (filled-up) input vectors
size_pred = np.zeros((n_obs, 1))
size_pred[:] = np.nan
shape_pred = np.zeros((n_obs))
shape_pred[:] = np.nan
if len(ind) > 0:  #at least one value
    size_pred[ind] = regressor.predict(input_valid)

Exemple #8

0

Afficher le fichier

def shifts_montgomery(data,
                      L_plus,
                      L_minus=None,
                      delta=1.5,
                      k=None,
                      nmc=4000,
                      n=2000,
                      two_sided=True,
                      block_length=None,
                      missing_values='omit',
                      gap=0,
                      BB_method='MBB'):
    """ 
    Estimates the shift sizes of the data with an optimal formula. 
    
    The sizes of the shifts are estimated after each alert using a
    classical formula (Montgomery, Introduction to statistical 
    quality control, 2004) on the out-of-control (OC) series.
     
    Parameters
    ---------
    data : 2D-array
        OC dataset (rows: time, columns: OC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter.  The default is None.
        When None, k = delta/2 (optimal formula for iid normal data).
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
       
    Returns
    --------
    shifts : 1D-array
        The estimated shift sizes.
        
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"

    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    (n_obs, n_series) = data.shape

    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    shift_hat_plus = np.zeros((nmc, 1))
    shift_hat_minus = np.zeros((nmc, 1))
    shift_hat_plus[:] = np.nan
    shift_hat_minus[:] = np.nan
    for b in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        C_plus = np.zeros((n, 1))
        cp = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[i] = C_plus[i - 1]
                cp += 1
            else:
                C_plus[i] = 0
            if C_plus[i] > L_plus:
                last_zero = np.where(C_plus[:i] == 0)[0][-1]
                shift_hat_plus[b] = k + C_plus[i] / (i - last_zero)
                break

        C_minus = np.zeros((n, 1))
        cm = 0
        for j in range(1, n):
            if not np.isnan(boot[j]):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                cm = 0
            elif (np.isnan(boot[j]) and cm < gap):
                C_minus[j] = C_minus[j - 1]
                cm += 1
            else:
                C_minus[j] = 0
            if C_minus[j] < L_minus:
                last_zero = np.where(C_minus[:j] == 0)[0][-1]
                shift_hat_minus[b] = -k - C_minus[j] / (j - last_zero)
                break

    if two_sided:
        shifts = np.concatenate(
            (shift_hat_plus[np.where(~np.isnan(shift_hat_plus))],
             shift_hat_minus[np.where(~np.isnan(shift_hat_minus))]))
    else:
        shifts = shift_hat_plus[np.where(~np.isnan(shift_hat_plus))]

    return shifts

Exemple #9

0

Afficher le fichier

def ARL_values(data,
               L_plus,
               L_minus=None,
               form='jump',
               delta=1.5,
               k=None,
               nmc=4000,
               n=8000,
               two_sided=True,
               missing_values='omit',
               gap=0,
               block_length=None,
               BB_method='MBB'):
    """ 
    Computes the in-control (IC) and out-of-control (OC) average run lengths 
    (ARL0 and ARL1) of the CUSUM chart.
    
    The algorithm works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. The IC run length of the chart 
    is then evaluated.
    A shift of specified form and size is also simulated on top of the sample
    and OC run length of the chart is computed. 
    Finally, the OC and IC average run lengths are calculated over the runs.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    form :  str, optional
         String that represents the form of the shift that are simulated. 
         The value of the string should be chosen among: 'jump', 'oscillation'
         or 'drift'.
         Default is 'jump'.
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter (default is None). 
        When None, k = delta/2 (optimal formula for normal data).
        The default is None.
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
  
    Returns
    --------
    ARL1, ARL0: float
       The OC and IC average run lengths (ARL1 and ARL0) of the chart.
       
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    assert form in ['jump', 'drift', 'oscillation'], "Undefined shift form"
    shift = delta
    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n > 0, "n must be strictly positive"
    n_shift = int(n / 2)
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    FP_minus = np.zeros((nmc, 1))
    FP_plus = np.zeros((nmc, 1))
    RL1_plus = np.zeros((nmc, 1))
    RL1_minus = np.zeros((nmc, 1))
    RL1_plus[:] = np.nan
    RL1_minus[:] = np.nan
    for b in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        if form == 'oscillation':
            eta = np.random.uniform(0.02, 0.2)
            boot[n_shift:] = np.sin(
                eta * np.pi * np.arange(n_shift)) * shift + boot[n_shift:]
            pass
        elif form == 'drift':
            power = np.random.uniform(1.5, 2)
            boot[n_shift:] = shift / (500) * (np.arange(n_shift)**
                                              power) + boot[n_shift:]
            pass
        else:
            boot[n_shift:] = boot[n_shift:] + shift
            pass

        cnt_plus = 0
        cp = 0
        C_plus = np.zeros((n, 1))
        nan_p = np.zeros((n, 1))
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                C_plus[n_shift] = 0
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[n_shift] = 0
                C_plus[i] = C_plus[i - 1]
                cp += 1
                nan_p[i] = 1
            else:
                C_plus[i] = 0
                nan_p[i] = 1
            if C_plus[i] > L_plus and i < n_shift + 1 and cnt_plus == 0:
                ind = nan_p[0:i]
                FP_plus[b] = i  #-sum(ind)
                cnt_plus += 1
            elif C_plus[i] > L_plus and i > n_shift:
                ind = nan_p[n_shift:i]
                RL1_plus[b] = i - n_shift  #-sum(ind)
                break

        cnt_minus = 0
        cm = 0
        C_minus = np.zeros((n, 1))
        nan_m = np.zeros((n, 1))
        for j in range(1, n):
            if not np.isnan(boot[j]):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                C_minus[n_shift] = 0
                cm = 0
            elif (np.isnan(boot[j]) and cm < gap):
                C_minus[n_shift] = 0
                C_minus[j] = C_minus[j - 1]
                cm += 1
                nan_m[j] = 1
            else:
                C_minus[j] = 0
                nan_m[j] = 1
            if C_minus[
                    j] < L_minus and j < n_shift + 1 and cnt_minus == 0:  # first false positive
                ind = nan_p[0:j]
                FP_minus[b] = j  #-sum(ind)
                cnt_minus += 1
            elif C_minus[j] < L_minus and j > n_shift:
                ind = nan_m[n_shift:j]
                RL1_minus[b] = j - n_shift  #-sum(ind)
                break

        if np.isnan(RL1_plus[b]):
            RL1_plus[b] = n - n_shift
        if np.isnan(RL1_minus[b]):
            RL1_minus[b] = n - n_shift

        if FP_minus[b] == 0:
            FP_minus[b] = n_shift
        if FP_plus[b] == 0:
            FP_plus[b] = n_shift

    if two_sided:
        ARL1 = (1 / (np.nanmean(RL1_minus)) + 1 / (np.nanmean(RL1_plus)))**(-1)
        ARL0 = (1 / (np.nanmean(FP_minus)) + 1 / (np.nanmean(FP_plus)))**(-1)
    else:
        ARL1 = np.mean(RL1_plus)
        ARL0 = np.nanmean(FP_plus)

    return (ARL1, ARL0)

Exemple #10

0

Afficher le fichier

def ARL0_CUSUM(data,
               L_plus,
               L_minus=None,
               delta=1.5,
               k=None,
               nmc=4000,
               n=4000,
               two_sided=True,
               missing_values='omit',
               gap=0,
               block_length=None,
               BB_method='MBB'):
    """ 
    Computes the in-control (IC) average run length (ARL0) of the CUSUM 
    chart in presence of missing values.
    
    The algorithm works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. Then, the run length of the chart 
    is evaluated. Finally, the average run length is calculated over the runs.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    delta : float, optional
        The target shift size. Default is 1.5. 
    k : float, optional
        The allowance parameter 
        When None, k = delta/2 (optimal formula for iid normal data).
        The default is None.
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    
    Returns
    -------
    ARL :  float          
        The IC average run length (ARL0).    
        
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    RL_minus = np.zeros((nmc, 1))
    RL_plus = np.zeros((nmc, 1))
    RL_minus[:] = np.nan
    RL_plus[:] = np.nan
    for j in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        ### Monitoring ###
        C_plus = np.zeros((n, 1))
        cp = 0
        nan_p = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[i] = C_plus[i - 1]
                cp += 1
                nan_p += 1
            else:
                C_plus[i] = 0
                nan_p += 1
            if C_plus[i] > L_plus:
                RL_plus[j] = i  #-nan_p
                break

        C_minus = np.zeros((n, 1))
        cm = 0
        nan_m = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_minus[i] = min(0, C_minus[i - 1] + boot[i] + k)
                cm = 0
            elif (np.isnan(boot[i]) and cm < gap):
                C_minus[i] = C_minus[i - 1]
                cm += 1
                nan_m += 1
            else:
                C_minus[i] = 0
                nan_m += 1
            if C_minus[i] < L_minus:
                RL_minus[j] = i  #-nan_m
                break

        if np.isnan(RL_plus[j]):
            RL_plus[j] = n  #-nan_p
        if np.isnan(RL_minus[j]):
            RL_minus[j] = n  #-nan_m

    if two_sided:
        ARL = (1 / (np.mean(RL_minus)) + 1 / (np.mean(RL_plus)))**(-1)
    else:
        ARL = np.mean(RL_plus)
    return ARL

Exemple #11

0

Afficher le fichier

def limit_CUSUM(data,
                delta=1.5,
                k=None,
                ARL0_threshold=200,
                rho=2,
                L_plus=20,
                L_minus=0,
                nmc=4000,
                n=4000,
                two_sided=True,
                verbose=True,
                missing_values='omit',
                gap=0,
                block_length=None,
                BB_method='MBB'):
    """ 
   Computes the control limit of the CUSUM chart in presence of missing values.
    
   The control limits of the chart are adjusted by a searching algorithm as follows.
   From initial values of the control limit, the actual IC average run 
   length (ARL0) is computed on 'nmc' processes that are sampled with repetition 
   from the IC data by the block bootstrap procedure.
   If the actual ARL0 is inferior (resp. superior) to the pre-specified ARL0, 
   the control limit of the chart is increased (resp. decreased).
   This algorithm is iterated until the actual ARL0 reaches the pre-specified ARL0
   at the desired accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter.  The default is None.
        When None, k = delta/2 (optimal formula for iid normal data).
    ARL0_threshold : int > 0, optional
        Pre-specified value for the IC average run length (ARL0). 
        This value is inversely proportional to the rate of false positives.
        Typical values are 100, 200 or 500. Default is 200.
    rho : float > 0, optional
        Accuracy to reach the pre-specified value for ARL0: 
        the algorithm stops when |ARL0-ARL0_threshold| < rho.
        The default is 2.
    L_plus : float, optional
        Upper value for the positive control limit. Default is 60.
    L_minus : float, optional
        Lower value for the positive control limit. Default is 0. 
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    Verbose : bool, optional
        Flag to print intermediate results. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    
    Returns
    ------
    L : float
       The positive control limit of the chart (with this algorithm,
       it has the same value as the negative control limit, with opposite sign). 
       
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)  #all_NaN=False
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    if k is None:
        k = abs(delta) / 2
    assert L_plus > L_minus, "L_plus should be superior than L_minus"
    L = (L_plus + L_minus) / 2
    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"
    assert rho > 0, "rho must be strictly positive"
    assert ARL0_threshold > 0, "ARL0_threshold must be strictly positive"

    ARL = 0
    while (np.abs(ARL - ARL0_threshold) > rho):
        RL_minus = np.zeros((nmc, 1))
        RL_plus = np.zeros((nmc, 1))
        RL_minus[:] = np.nan
        RL_plus[:] = np.nan
        for j in range(nmc):

            if BB_method == 'MABB':
                boot = bb.resample_MatchedBB(data, block_length, n=n)
            else:
                boot = resample(blocks, replace=True,
                                n_samples=n_blocks).flatten()[:n]

            ### Monitoring ###
            C_plus = np.zeros((n, 1))
            cp = 0
            nan_p = 0
            for i in range(1, n):
                if not np.isnan(boot[i]):
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    cp = 0
                elif (np.isnan(boot[i]) and cp < gap):
                    C_plus[i] = C_plus[i - 1]
                    cp += 1
                    nan_p += 1
                else:
                    C_plus[i] = 0
                    nan_p += 1
                if C_plus[i] > L:
                    RL_plus[j] = i  #-nan_p
                    break

            C_minus = np.zeros((n, 1))
            cm = 0
            nan_m = 0
            for i in range(1, n):
                if not np.isnan(boot[i]):
                    C_minus[i] = min(0, C_minus[i - 1] + boot[i] + k)
                    cm = 0
                elif (np.isnan(boot[i]) and cm < gap):
                    C_minus[i] = C_minus[i - 1]
                    cm += 1
                    nan_m += 1
                else:
                    C_minus[i] = 0
                    nan_m += 1
                if C_minus[i] < -L:
                    RL_minus[j] = i  #- nan_m
                    break

            if np.isnan(RL_plus[j]):
                RL_plus[j] = n  #- nan_p
            if np.isnan(RL_minus[j]):
                RL_minus[j] = n  #- nan_m

        if two_sided:
            ARL = (1 / (np.mean(RL_minus)) + 1 / (np.mean(RL_plus)))**(-1)
        else:
            ARL = np.mean(RL_plus)
        if ARL < ARL0_threshold:
            L_minus = (L_minus + L_plus) / 2
        elif ARL > ARL0_threshold:
            L_plus = (L_minus + L_plus) / 2
        L = (L_plus + L_minus) / 2

        if verbose:
            print(ARL)
            print(L)

    return L