Beispiel #1
0
def input_vector_length(data,
                        delta_min,
                        L_plus,
                        L_minus=None,
                        k=None,
                        nmc=4000,
                        n=2000,
                        qt=None,
                        block_length=None,
                        BB_method='MBB',
                        plot=False):
    """ 
    Computes the length of the input vector for the support vector machine 
    procedures (svms). 
    
    The length of the input vector represents the number of past observations 
    that are fed to the svms after each alert. The regressor and classifier 
    then predict the form and the size of the shift that causes the alert
    based on the input vector.
    Intuitively, the length should be sufficiently large to ensure that most 
    of the shifts are contained within the input vector while maintaining 
    the computing efficiency of the method. This is usually 
    not a problem for the large shifts that are quickly detected by the chart. 
    However the smallest shifts may be detected only after a certain amount 
    of time and therefore require larger vectors. 
    Hence, the length is selected as an upper quantile of the run length 
    distribution, computed on data shifted by the smallest shift size 
    that we aim to detect.
    
    It is implemented as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. Then, a jump of size 
    "delta_min' is simulated on top of the sample. 
    The run length of the chart is then evaluated. The length of the input 
    vector is finally selected as a specified quantile of the run length
    distribution. If the quantile is unspecified, an optimal quantile 
    is selected by locating the 'knee' of the quantiles curve.
        
    
    Parameters: 
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    delta_min : float >= 0
        The target minimum shift size. 
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 2000. 
    n : int >= 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 2000. 
    qt :  float in [0,1], optional
        Quantile of the run length distribution (used to select an appropriate
        input vector length). Default is None. 
        When None, the appropriate quantile is selected with a knee locator. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    plot :  bool, optional 
        Flag to show the histogram of the run length distribution. 
        Default is False.
         
    Returns
    -------
    m : int > 0
        The length of the input vector.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    if k is None:
        k = delta_min / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n >= 0, "n must be superior or equal to zero"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    RL1_plus = np.zeros((nmc, 1))
    RL1_minus = np.zeros((nmc, 1))
    RL1_plus[:] = np.nan
    RL1_minus[:] = np.nan
    for b in range(nmc):

        #sample data with BB and shift them by delta_min
        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]
        boot = boot + delta_min

        C_plus = np.zeros((n, 1))
        for i in range(1, n):
            C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
            if C_plus[i] > L_plus:
                RL1_plus[b] = i
                break

        C_minus = np.zeros((n, 1))
        for j in range(1, n):
            C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
            if C_minus[j] < L_minus:
                RL1_minus[b] = j
                break

        if np.isnan(RL1_plus[b]):
            RL1_plus[b] = n
        if np.isnan(RL1_minus[b]):
            RL1_minus[b] = n

    RL = (1 / (RL1_minus) + 1 / (RL1_plus))**(-1)

    if plot:
        plt.figure(1)
        plt.hist(RL[~np.isnan(RL)],
                 range=[-4, 100],
                 bins='auto',
                 density=True,
                 facecolor='b')
        plt.title("Run length distribution")
        plt.axis([-4, 100, 0, 0.2])
        plt.grid(True)
        plt.show()

    if qt is not None:
        ### select m with a specified quantile
        m = int(np.quantile(RL[~np.isnan(RL)], qt))
    else:
        ### select m with knee locator
        y = np.zeros((100))
        c = 0
        x = np.arange(1, 0.5, -0.05)
        for q in np.arange(1, 0.5, -0.05):
            y[c] = np.quantile(RL[~np.isnan(RL)], q)
            c += 1

        y = y[:len(x)]
        if plot:
            plt.plot(x, y)
            plt.xlabel('quantile')
            plt.ylabel('run length')
            plt.title('Run length at different quantiles')
            plt.show()
        coef = np.polyfit(x, y, deg=1)
        coef_curve = np.polyfit(x, y, deg=2)
        if coef_curve[0] < 0:
            curve = 'concave'
        else:
            curve = 'convex'
        if coef[0] < 0:  #slope is positive
            direction = 'decreasing'
        else:  #slope is negative
            direction = 'increasing'
        kn = KneeLocator(x, y, curve=curve, direction=direction)
        knee = kn.knee
        m = int(np.quantile(RL[~np.isnan(RL)], knee))

    return m
Beispiel #2
0
def choice_C(data,
             L_plus,
             delta_min,
             wdw_length,
             scale,
             start=1,
             stop=10,
             step=1,
             delay=0,
             L_minus=None,
             k=None,
             n=36000,
             n_series=500,
             epsilon=0.001,
             block_length=None,
             BB_method='MBB',
             confusion=False,
             verbose=True):
    """
    Selects an appropriate value for the regularization parameter (C) of the 
    svm procedures. 
    
    The procedure is implemented as follows.
    For each value of C, the regressor and classifier are trained and validated.
    Then, the values of C that maximize/minimize different performance 
    measures are returned. 
    The training (and validating) procedure works as explained below.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure.
    A shift size is then sampled from a halfnormal distribution (supported by 
    [delta_min, +inf]) with a specified scale parameter.
    A jump, an oscillating shift and a drift of previous size 
    are then added on top of the sample to create artificial deviations. 
    The classifer is then trained to recognize the form of deviations among the 
    three general classes: 'jump', 'drift' or 'oscillation' whereas
    the regressor learns to predict the shift sizes in a continuous range.
    
    Once the learning is finished, a validation step is also applied on 
    unseen deviations to evaluate the performances of the svr and svc. 
    Three criteria are computed: the mean absolute percentage
    error (MAPE), the mean squared error (MSE) and the accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    delta_min : float > 0
        The target minimum shift size. 
    wdw_length : int > 0
        The length of the input vector.
    scale : float > 0
         The scale parameter of the halfnormal distribution 
         (similar to the variance of a normal distribution). 
         A typical range of values for scale is [1,4], depending on the size
         of the actual deviations
    start : float > 0, optional
        Starting value for C. Default is 1.
    stop : float > 0, optional
        Stopping value for C. Default is 10.
    step : float > 0, optional
        Step value for C. The function tests different values of C in the 
        range [start, stop] with step value equal to 'step'. Default is 1.
    delay : int, optional
        Flag to start the chart after a delay randomly selected from the
        interval [0, delay]. Default is 0 (no delay). 
    L_minus :  float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    n : int > 0, optional      
        Number of training and validating instances. This value is 
        typically large. Default is 36000.
    n_series : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 500. 
    epsilon : float, optional
        Parameter of the svr, which represents the approximation accuracy. 
        Default is 0.001.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    confusion : bool, optional 
        Flag to show the confusion matrix (measure of the classification accuracy, 
        class by class). Default is False.  
    verbose : bool, optional    
        Flag to print infos about C. Default is True.
          
    Returns 
    ------
    min_MAPE : float > 0
        The value of C that minimizes the MAPE (mean absolute percentage error).
    min_MSE : float > 0
        The value of C that minimizes the MSE (mean squared error).
    max_accuracy : float > 0
        The value of C that maximizes the accuracy.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n_series / blocks.shape[1]))

    wdw_length = int(np.ceil(wdw_length))  #should be integer

    delay = int(delay)
    n = int(n)
    assert n > 0, "n must be strictly positive"
    if n % 3 == 2:  #n should be multiple of 3
        n += 1
    if n % 3 == 1:
        n += 2

    if L_minus is None:
        L_minus = -L_plus
    if k is None:
        k = delta_min / 2

    sign = 1
    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances

    n_C = int(np.ceil((stop - start) / step))
    MAPE = np.zeros((n_C))
    MSE = np.zeros((n_C))
    accuracy = np.zeros((n_C))
    count = 0
    C_values = np.arange(start, stop, step)
    for C in np.arange(start, stop, step):

        ### training
        input_train = np.zeros((n_train, wdw_length))
        size_train = np.zeros((n_train))
        form_train = np.zeros((n_train))
        rnd = halfnorm(scale=scale).rvs(
            size=n_train) + delta_min  #size of shifts
        delay_rnd = 0
        for b in range(0, n_train - 2, 3):

            shift = rnd[b] * sign
            if BB_method == 'MABB':
                series = bb.resample_MatchedBB(data, block_length, n=n_series)
            else:
                series = resample(blocks, replace=True,
                                  n_samples=n_blocks).flatten()[:n_series]

            #simulate a random delay
            if delay > 0:
                delay_rnd = np.random.randint(delay)

            for rnd_form in range(3):

                boot = np.copy(series)

                if rnd_form == 0:
                    boot[wdw_length:] = boot[wdw_length:] + shift
                    form_train[b] = 0
                elif rnd_form == 1:
                    power = np.random.uniform(1.5, 2)
                    boot = shift / (n_series) * (np.arange(0, n_series)**
                                                 power) + boot
                    form_train[b] = 1
                else:
                    eta = np.random.uniform(np.pi / (wdw_length),
                                            3 * np.pi / wdw_length)
                    boot = np.sin(
                        eta * np.pi * np.arange(n_series)) * shift * boot
                    form_train[b] = 2

                size_train[b] = shift

                input_plus = boot[wdw_length:wdw_length * 2]
                C_plus = np.zeros((n_series, 1))
                for i in range(
                        wdw_length + delay_rnd,
                        n_series):  #start the monitoring after random delay
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    if C_plus[i] > L_plus:
                        input_plus = boot[i + 1 - wdw_length:i + 1]
                        break

                input_minus = boot[wdw_length:wdw_length * 2]
                C_minus = np.zeros((n_series, 1))
                for j in range(wdw_length + delay_rnd, n_series):
                    C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                    if C_minus[j] < L_minus:
                        input_minus = boot[j + 1 - wdw_length:j + 1]
                        break

                if i > j:  #save first alert recorded
                    input_train[b, :] = input_minus
                else:
                    input_train[b, :] = input_plus

                b += 1
            sign = -sign

        ### train the models
        regressor = SVR(C=C, epsilon=epsilon)
        regressor.fit(input_train, size_train)
        clf = svm.SVC(C=C)
        clf.fit(input_train, form_train)

        ###testing
        input_test = np.zeros((n_test, wdw_length))
        label_test = np.zeros((n_test))
        form_test = np.zeros((n_test))
        rnd = halfnorm(scale=scale).rvs(size=n_test) + delta_min
        delay_rnd = 0
        for b in range(0, n_test - 2, 3):

            shift = rnd[b] * sign
            if BB_method == 'MABB':
                series = bb.resample_MatchedBB(data, block_length, n=n_series)
            else:
                series = resample(blocks, replace=True,
                                  n_samples=n_blocks).flatten()[:n_series]

            #simulate a random delay
            if delay > 0:
                delay_rnd = np.random.randint(delay)

            for rnd_form in range(3):

                boot = np.copy(series)

                if rnd_form == 0:
                    boot[wdw_length:] = boot[wdw_length:] + shift
                    form_test[b] = 0
                elif rnd_form == 1:
                    power = np.random.uniform(1.5, 2)
                    boot = shift / (n_series) * (np.arange(0, n_series)**
                                                 power) + boot
                    form_test[b] = 1
                else:
                    eta = np.random.uniform(np.pi / (wdw_length),
                                            3 * np.pi / wdw_length)
                    boot = np.sin(
                        eta * np.pi * np.arange(n_series)) * shift * boot
                    form_test[b] = 2
                label_test[b] = shift

                input_plus = boot[wdw_length:wdw_length * 2]
                C_plus = np.zeros((n_series, 1))
                for i in range(wdw_length + delay_rnd, n_series):
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    if C_plus[i] > L_plus:
                        input_plus = boot[i + 1 - wdw_length:i + 1]
                        break

                input_minus = boot[wdw_length:wdw_length * 2]
                C_minus = np.zeros((n_series, 1))
                for j in range(wdw_length + delay_rnd, n_series):
                    C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                    if C_minus[j] < L_minus:
                        input_minus = boot[j + 1 - wdw_length:j + 1]
                        break

                if i > j:  #first alert recorded
                    input_test[b, :] = input_minus
                else:
                    input_test[b, :] = input_plus

                b += 1
            sign = -sign

        ### compute accuracy and other precision measures
        label_pred = regressor.predict(input_test)
        label_pred_clf = clf.predict(input_test)

        #regressor
        MAPE[count] = (1 / len(label_pred)) * sum(
            np.abs((np.abs(label_test) - np.abs(label_pred)) /
                   np.abs(label_test))) * 100
        MSE[count] = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        #classifier
        accuracy[count] = sum(
            label_pred_clf == form_test) * 100 / len(label_pred_clf)

        ### compute the confusion matrix
        if confusion:
            class_names = ['jump', 'drift', 'oscill.']
            titles_options = [("Confusion matrix, without normalization",
                               None), ("Normalized confusion matrix", 'true')]
            for title, normalize in titles_options:
                disp = plot_confusion_matrix(clf,
                                             input_test,
                                             form_test,
                                             display_labels=class_names,
                                             cmap=plt.cm.Blues,
                                             normalize=normalize)
                disp.ax_.set_title(title)
                print(title)
                print(disp.confusion_matrix)
            plt.show()

        count += 1

    min_MAPE = C_values[np.argmin(MAPE)]
    min_MSE = C_values[np.argmin(MSE)]
    max_accuracy = C_values[np.argmax(accuracy)]

    if verbose:
        print('C value that minimizes the MAPE:', min_MAPE)
        print('C value that minimizes the MSE:', min_MSE)
        print('C value that maximizes the accuracy:', max_accuracy)

    return min_MAPE, min_MSE, max_accuracy
Beispiel #3
0
def training_svm(data,
                 L_plus,
                 delta_min,
                 wdw_length,
                 scale,
                 delay=0,
                 L_minus=None,
                 k=None,
                 n=63000,
                 n_series=500,
                 C=1.0,
                 epsilon=0.001,
                 kernel='rbf',
                 degree=3,
                 block_length=None,
                 BB_method='MBB',
                 precision=True,
                 confusion=True):
    """
    Trains the support vector machine classifier (svc) and regressor (svr).
    
    The training (and validating) procedure works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure.
    A shift size is then sampled from a halfnormal distribution (supported by 
    [delta_min, +inf]) with a specified scale parameter.
    A jump, an oscillating shift (with random frequency in the interval 
    [pi/(wdw_length), 3*pi/wdw_length]) and a drift (with random power-law
    functions in the range [1.5,2]) of previous size 
    are then added on top of the sample to create artificial deviations. 
    The classifer is then trained to recognize the form of deviations among the 
    three general classes: 'jump', 'drift' or 'oscillation' whereas 
    the regressor learns to predict the shift sizes in a
    continuous range. 
    Once the learning is finished, a validation step is also applied on 
    unseen deviations to evaluate the performances of the svr and svc.
    Three criteria are computed: the mean absolute percentage error
    (MAPE), the mean squared error (MSE) and the accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    delta_min : float > 0
        The target minimum shift size. 
    wdw_length : int > 0
        The length of the input vector.
    scale : float > 0
         The scale parameter of the halfnormal distribution 
         (similar to the variance of a normal distribution). 
         A typical range of values for scale is [1,4], depending on the size
         of the actual deviations
    delay : int, optional
        Flag to start the chart after a delay, randomly selected from the
        interval [0, delay]. Default is 0 (no delay). 
    L_minus :  float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    k : float, optional
        The allowance parameter. The default is None. 
        When None, k = delta/2 (optimal formula for iid normal data).
    n : int > 0, optional      
        Number of training and validating instances. This value is 
        typically large. Default is 63000.
    n_series : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 500. 
    C : float > 0, optional
        Regularization parameter of the svr and svc (the strength of the 
        regularization is inversely proportional to C).
        Default is 1. Typical range is [1, 10].
    epsilon : float, optional
        Parameter of the svr, which represents the approximation accuracy. 
        Default is 0.001.
    kernel : str, optional
        The kernel function to be used in the svm procedures. 
        Values should be selected among: 'rbf', 'linear', 'sigmoid' and 'poly'. 
        Default is 'rbf'.
    degree : int > 0, optional
        The degree of the polynomial kernel. Only used when kernel='poly'.
        Default is 3.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    precision : bool, optional    
        Flag to print accuracy measures. Default is True.
    confusion : bool, optional 
        Flag to show the confusion matrix (measure of the classification accuracy, 
        class by class). Default is True.        
          
    Returns 
    ------
    clf : support vector classification model
        The trained classifier.
    regressor : support vector regression model
        The trained regressor.
        
    """
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if BB_method == 'MBB':
        blocks = bb.MBB(data, block_length)
    elif BB_method == 'NBB':
        blocks = bb.NBB(data, block_length)
    elif BB_method == 'CBB':
        blocks = bb.CBB(data, block_length)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n_series / blocks.shape[1]))

    wdw_length = int(np.ceil(wdw_length))  #should be integer

    delay = int(delay)
    n = int(n)
    assert n > 0, "n must be strictly positive"
    if n % 3 == 2:  #n should be multiple of 3
        n += 1
    if n % 3 == 1:
        n += 2

    if L_minus is None:
        L_minus = -L_plus
    if k is None:
        k = delta_min / 2

    assert degree > 0, "degree must be strictly positive"
    degree = int(degree)

    sign = 1
    n_test = int(n / 5)  #n testing instances
    n_train = n - n_test  #n training instances

    ### training
    input_train = np.zeros((n_train, wdw_length))
    size_train = np.zeros((n_train))
    form_train = np.zeros((n_train))
    rnd = halfnorm(scale=scale).rvs(size=n_train) + delta_min  #size of shifts
    delay_rnd = 0
    for b in range(0, n_train - 2, 3):

        shift = rnd[b] * sign
        if BB_method == 'MABB':
            series = bb.resample_MatchedBB(data, block_length, n=n_series)
        else:
            series = resample(blocks, replace=True,
                              n_samples=n_blocks).flatten()[:n_series]

        #simulate a random delay
        if delay > 0:
            delay_rnd = np.random.randint(delay)

        for rnd_form in range(3):
            boot = np.copy(series)

            if rnd_form == 0:  #jump
                boot[wdw_length:] = boot[wdw_length:] + shift
                form_train[b] = 0
            elif rnd_form == 1:  #drift
                power = np.random.uniform(1.5, 2)
                boot = shift / (n_series) * (np.arange(n_series)**power) + boot
                form_train[b] = 1
            elif rnd_form == 2:  #oscillating shift
                #eta = np.random.uniform(np.pi/(2*wdw_length), 2*np.pi/wdw_length)
                eta = np.random.uniform(np.pi / (wdw_length),
                                        3 * np.pi / wdw_length)
                boot = np.sin(eta * np.pi * np.arange(n_series)) * shift * boot
                form_train[b] = 2

            size_train[b] = shift

            input_plus = boot[wdw_length:wdw_length * 2]  #default is not alert
            C_plus = np.zeros((n_series, 1))
            for i in range(wdw_length + delay_rnd,
                           n_series):  #start the monitoring after random delay
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                if C_plus[i] > L_plus:
                    input_plus = boot[i + 1 - wdw_length:i + 1]
                    break

            input_minus = boot[wdw_length:wdw_length *
                               2]  #default is not alert
            C_minus = np.zeros((n_series, 1))
            for j in range(wdw_length + delay_rnd, n_series):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                if C_minus[j] < L_minus:
                    input_minus = boot[j + 1 - wdw_length:j + 1]
                    break

            if i > j:  #save first alert recorded
                input_train[b, :] = input_minus
            else:
                input_train[b, :] = input_plus

            b += 1
        sign = -sign

    ### train the models
    regressor = SVR(C=C, epsilon=epsilon, kernel=kernel, degree=degree)
    regressor.fit(input_train, size_train)
    clf = svm.SVC(C=C, kernel=kernel, degree=degree)
    clf.fit(input_train, form_train)

    ###testing
    input_test = np.zeros((n_test, wdw_length))
    label_test = np.zeros((n_test))
    form_test = np.zeros((n_test))
    rnd = halfnorm(scale=scale).rvs(size=n_test) + delta_min
    delay_rnd = 0
    for b in range(0, n_test - 2, 3):

        shift = rnd[b] * sign
        if BB_method == 'MABB':
            series = bb.resample_MatchedBB(data, block_length, n=n_series)
        else:
            series = resample(blocks, replace=True,
                              n_samples=n_blocks).flatten()[:n_series]

        #simulate a random delay
        if delay > 0:
            delay_rnd = np.random.randint(delay)

        for rnd_form in range(3):

            boot = np.copy(series)

            if rnd_form == 0:
                boot[wdw_length:] = boot[wdw_length:] + shift
                form_test[b] = 0
            elif rnd_form == 1:
                power = np.random.uniform(1.5, 2)
                boot = shift / (n_series) * (np.arange(n_series)**power) + boot
                form_test[b] = 1
            else:
                #eta = np.random.uniform(np.pi/(2*wdw_length), 2*np.pi/wdw_length)
                eta = np.random.uniform(np.pi / (wdw_length),
                                        3 * np.pi / wdw_length)
                boot = np.sin(eta * np.pi * np.arange(n_series)) * shift * boot
                form_test[b] = 2

            label_test[b] = shift

            input_plus = boot[wdw_length:wdw_length * 2]  #default is not alert
            C_plus = np.zeros((n_series, 1))
            for i in range(wdw_length + delay_rnd, n_series):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                if C_plus[i] > L_plus:
                    input_plus = boot[i + 1 - wdw_length:i + 1]
                    break

            input_minus = boot[wdw_length:wdw_length *
                               2]  #default is not alert
            C_minus = np.zeros((n_series, 1))
            for j in range(wdw_length + delay_rnd, n_series):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                if C_minus[j] < L_minus:
                    input_minus = boot[j + 1 - wdw_length:j + 1]
                    break

            if i > j:  #first alert recorded
                input_test[b, :] = input_minus
            else:
                input_test[b, :] = input_plus

            b += 1
        sign = -sign

    ### compute accuracy and other precision measures
    label_pred = regressor.predict(input_test)
    label_pred_clf = clf.predict(input_test)

    if precision:
        #regressor
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE =', MAPE)
        print('MSE =', MSE)

        label_pred = abs(label_pred)
        label_test = abs(label_test)
        MAPE = (1 / len(label_pred)) * sum(
            np.abs((label_test - label_pred) / label_test)) * 100
        #NRMSE = np.sqrt(sum((label_test - label_pred)**2) / sum(label_test**2))
        MSE = (1 / len(label_pred)) * sum((label_test - label_pred)**2)
        print('MAPE without signs =', MAPE)
        print('MSE without signs =', MSE)

        #classifier
        accuracy = sum(label_pred_clf == form_test) * 100 / len(label_pred_clf)
        #MAE = (1/len(label_pred_clf)) * sum(np.abs(form_test - label_pred_clf))
        #MSE = (1/len(label_pred_clf)) * sum((form_test - label_pred_clf)**2)
        print('Accuracy =', accuracy)

    ### compute the confusion matrix
    if confusion:
        class_names = ['jump', 'drift', 'oscill.']
        titles_options = [("Confusion matrix, without normalization", None),
                          ("Normalized confusion matrix", 'true')]
        for title, normalize in titles_options:
            disp = plot_confusion_matrix(clf,
                                         input_test,
                                         form_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
            disp.ax_.set_title(title)
            print(title)
            print(disp.confusion_matrix)
            print(disp.confusion_matrix[2, 1] / n_test)
        plt.show()

    return (regressor, clf)
Beispiel #4
0
def shifts_montgomery(data,
                      L_plus,
                      L_minus=None,
                      delta=1.5,
                      k=None,
                      nmc=4000,
                      n=2000,
                      two_sided=True,
                      block_length=None,
                      missing_values='omit',
                      gap=0,
                      BB_method='MBB'):
    """ 
    Estimates the shift sizes of the data with an optimal formula. 
    
    The sizes of the shifts are estimated after each alert using a
    classical formula (Montgomery, Introduction to statistical 
    quality control, 2004) on the out-of-control (OC) series.
     
    Parameters
    ---------
    data : 2D-array
        OC dataset (rows: time, columns: OC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter.  The default is None.
        When None, k = delta/2 (optimal formula for iid normal data).
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
       
    Returns
    --------
    shifts : 1D-array
        The estimated shift sizes.
        
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"

    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    (n_obs, n_series) = data.shape

    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    shift_hat_plus = np.zeros((nmc, 1))
    shift_hat_minus = np.zeros((nmc, 1))
    shift_hat_plus[:] = np.nan
    shift_hat_minus[:] = np.nan
    for b in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        C_plus = np.zeros((n, 1))
        cp = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[i] = C_plus[i - 1]
                cp += 1
            else:
                C_plus[i] = 0
            if C_plus[i] > L_plus:
                last_zero = np.where(C_plus[:i] == 0)[0][-1]
                shift_hat_plus[b] = k + C_plus[i] / (i - last_zero)
                break

        C_minus = np.zeros((n, 1))
        cm = 0
        for j in range(1, n):
            if not np.isnan(boot[j]):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                cm = 0
            elif (np.isnan(boot[j]) and cm < gap):
                C_minus[j] = C_minus[j - 1]
                cm += 1
            else:
                C_minus[j] = 0
            if C_minus[j] < L_minus:
                last_zero = np.where(C_minus[:j] == 0)[0][-1]
                shift_hat_minus[b] = -k - C_minus[j] / (j - last_zero)
                break

    if two_sided:
        shifts = np.concatenate(
            (shift_hat_plus[np.where(~np.isnan(shift_hat_plus))],
             shift_hat_minus[np.where(~np.isnan(shift_hat_minus))]))
    else:
        shifts = shift_hat_plus[np.where(~np.isnan(shift_hat_plus))]

    return shifts
Beispiel #5
0
def ARL_values(data,
               L_plus,
               L_minus=None,
               form='jump',
               delta=1.5,
               k=None,
               nmc=4000,
               n=8000,
               two_sided=True,
               missing_values='omit',
               gap=0,
               block_length=None,
               BB_method='MBB'):
    """ 
    Computes the in-control (IC) and out-of-control (OC) average run lengths 
    (ARL0 and ARL1) of the CUSUM chart.
    
    The algorithm works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. The IC run length of the chart 
    is then evaluated.
    A shift of specified form and size is also simulated on top of the sample
    and OC run length of the chart is computed. 
    Finally, the OC and IC average run lengths are calculated over the runs.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    form :  str, optional
         String that represents the form of the shift that are simulated. 
         The value of the string should be chosen among: 'jump', 'oscillation'
         or 'drift'.
         Default is 'jump'.
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter (default is None). 
        When None, k = delta/2 (optimal formula for normal data).
        The default is None.
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
  
    Returns
    --------
    ARL1, ARL0: float
       The OC and IC average run lengths (ARL1 and ARL0) of the chart.
       
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    assert form in ['jump', 'drift', 'oscillation'], "Undefined shift form"
    shift = delta
    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n > 0, "n must be strictly positive"
    n_shift = int(n / 2)
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    FP_minus = np.zeros((nmc, 1))
    FP_plus = np.zeros((nmc, 1))
    RL1_plus = np.zeros((nmc, 1))
    RL1_minus = np.zeros((nmc, 1))
    RL1_plus[:] = np.nan
    RL1_minus[:] = np.nan
    for b in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        if form == 'oscillation':
            eta = np.random.uniform(0.02, 0.2)
            boot[n_shift:] = np.sin(
                eta * np.pi * np.arange(n_shift)) * shift + boot[n_shift:]
            pass
        elif form == 'drift':
            power = np.random.uniform(1.5, 2)
            boot[n_shift:] = shift / (500) * (np.arange(n_shift)**
                                              power) + boot[n_shift:]
            pass
        else:
            boot[n_shift:] = boot[n_shift:] + shift
            pass

        cnt_plus = 0
        cp = 0
        C_plus = np.zeros((n, 1))
        nan_p = np.zeros((n, 1))
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                C_plus[n_shift] = 0
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[n_shift] = 0
                C_plus[i] = C_plus[i - 1]
                cp += 1
                nan_p[i] = 1
            else:
                C_plus[i] = 0
                nan_p[i] = 1
            if C_plus[i] > L_plus and i < n_shift + 1 and cnt_plus == 0:
                ind = nan_p[0:i]
                FP_plus[b] = i  #-sum(ind)
                cnt_plus += 1
            elif C_plus[i] > L_plus and i > n_shift:
                ind = nan_p[n_shift:i]
                RL1_plus[b] = i - n_shift  #-sum(ind)
                break

        cnt_minus = 0
        cm = 0
        C_minus = np.zeros((n, 1))
        nan_m = np.zeros((n, 1))
        for j in range(1, n):
            if not np.isnan(boot[j]):
                C_minus[j] = min(0, C_minus[j - 1] + boot[j] + k)
                C_minus[n_shift] = 0
                cm = 0
            elif (np.isnan(boot[j]) and cm < gap):
                C_minus[n_shift] = 0
                C_minus[j] = C_minus[j - 1]
                cm += 1
                nan_m[j] = 1
            else:
                C_minus[j] = 0
                nan_m[j] = 1
            if C_minus[
                    j] < L_minus and j < n_shift + 1 and cnt_minus == 0:  # first false positive
                ind = nan_p[0:j]
                FP_minus[b] = j  #-sum(ind)
                cnt_minus += 1
            elif C_minus[j] < L_minus and j > n_shift:
                ind = nan_m[n_shift:j]
                RL1_minus[b] = j - n_shift  #-sum(ind)
                break

        if np.isnan(RL1_plus[b]):
            RL1_plus[b] = n - n_shift
        if np.isnan(RL1_minus[b]):
            RL1_minus[b] = n - n_shift

        if FP_minus[b] == 0:
            FP_minus[b] = n_shift
        if FP_plus[b] == 0:
            FP_plus[b] = n_shift

    if two_sided:
        ARL1 = (1 / (np.nanmean(RL1_minus)) + 1 / (np.nanmean(RL1_plus)))**(-1)
        ARL0 = (1 / (np.nanmean(FP_minus)) + 1 / (np.nanmean(FP_plus)))**(-1)
    else:
        ARL1 = np.mean(RL1_plus)
        ARL0 = np.nanmean(FP_plus)

    return (ARL1, ARL0)
Beispiel #6
0
def ARL0_CUSUM(data,
               L_plus,
               L_minus=None,
               delta=1.5,
               k=None,
               nmc=4000,
               n=4000,
               two_sided=True,
               missing_values='omit',
               gap=0,
               block_length=None,
               BB_method='MBB'):
    """ 
    Computes the in-control (IC) average run length (ARL0) of the CUSUM 
    chart in presence of missing values.
    
    The algorithm works as follows.
    For each monte-carlo run, a new series of observations is sampled from the 
    IC data using a block boostrap procedure. Then, the run length of the chart 
    is evaluated. Finally, the average run length is calculated over the runs.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    L_plus : float 
        Value for the positive control limit.
    L_minus : float, optional
        Value for the negative control limit. Default is None. 
        When None, L_minus = - L_plus. 
    delta : float, optional
        The target shift size. Default is 1.5. 
    k : float, optional
        The allowance parameter 
        When None, k = delta/2 (optimal formula for iid normal data).
        The default is None.
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    
    Returns
    -------
    ARL :  float          
        The IC average run length (ARL0).    
        
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    if k is None:
        k = abs(delta) / 2
    if L_minus is None:
        L_minus = -L_plus
    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"

    RL_minus = np.zeros((nmc, 1))
    RL_plus = np.zeros((nmc, 1))
    RL_minus[:] = np.nan
    RL_plus[:] = np.nan
    for j in range(nmc):

        if BB_method == 'MABB':
            boot = bb.resample_MatchedBB(data, block_length, n=n)
        else:
            boot = resample(blocks, replace=True,
                            n_samples=n_blocks).flatten()[:n]

        ### Monitoring ###
        C_plus = np.zeros((n, 1))
        cp = 0
        nan_p = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                cp = 0
            elif (np.isnan(boot[i]) and cp < gap):
                C_plus[i] = C_plus[i - 1]
                cp += 1
                nan_p += 1
            else:
                C_plus[i] = 0
                nan_p += 1
            if C_plus[i] > L_plus:
                RL_plus[j] = i  #-nan_p
                break

        C_minus = np.zeros((n, 1))
        cm = 0
        nan_m = 0
        for i in range(1, n):
            if not np.isnan(boot[i]):
                C_minus[i] = min(0, C_minus[i - 1] + boot[i] + k)
                cm = 0
            elif (np.isnan(boot[i]) and cm < gap):
                C_minus[i] = C_minus[i - 1]
                cm += 1
                nan_m += 1
            else:
                C_minus[i] = 0
                nan_m += 1
            if C_minus[i] < L_minus:
                RL_minus[j] = i  #-nan_m
                break

        if np.isnan(RL_plus[j]):
            RL_plus[j] = n  #-nan_p
        if np.isnan(RL_minus[j]):
            RL_minus[j] = n  #-nan_m

    if two_sided:
        ARL = (1 / (np.mean(RL_minus)) + 1 / (np.mean(RL_plus)))**(-1)
    else:
        ARL = np.mean(RL_plus)
    return ARL
Beispiel #7
0
def limit_CUSUM(data,
                delta=1.5,
                k=None,
                ARL0_threshold=200,
                rho=2,
                L_plus=20,
                L_minus=0,
                nmc=4000,
                n=4000,
                two_sided=True,
                verbose=True,
                missing_values='omit',
                gap=0,
                block_length=None,
                BB_method='MBB'):
    """ 
   Computes the control limit of the CUSUM chart in presence of missing values.
    
   The control limits of the chart are adjusted by a searching algorithm as follows.
   From initial values of the control limit, the actual IC average run 
   length (ARL0) is computed on 'nmc' processes that are sampled with repetition 
   from the IC data by the block bootstrap procedure.
   If the actual ARL0 is inferior (resp. superior) to the pre-specified ARL0, 
   the control limit of the chart is increased (resp. decreased).
   This algorithm is iterated until the actual ARL0 reaches the pre-specified ARL0
   at the desired accuracy.
    
    Parameters
    ---------
    data : 2D-array
        IC dataset (rows: time, columns: IC series).
    delta : float, optional
        The target shift size. Default is 1.5.
    k : float, optional
        The allowance parameter.  The default is None.
        When None, k = delta/2 (optimal formula for iid normal data).
    ARL0_threshold : int > 0, optional
        Pre-specified value for the IC average run length (ARL0). 
        This value is inversely proportional to the rate of false positives.
        Typical values are 100, 200 or 500. Default is 200.
    rho : float > 0, optional
        Accuracy to reach the pre-specified value for ARL0: 
        the algorithm stops when |ARL0-ARL0_threshold| < rho.
        The default is 2.
    L_plus : float, optional
        Upper value for the positive control limit. Default is 60.
    L_minus : float, optional
        Lower value for the positive control limit. Default is 0. 
    nmc : int > 0, optional
        Number of Monte-Carlo runs. This parameter has typically a large value.
        Default is 4000. 
    n : int > 0, optional
        Length of the resampled series (by the block bootstrap procedure).
        Default is 4000. 
    two_sided : bool, optional
        Flag to use two-sided CUSUM chart. Otherwise, the one-sided 
        upper CUSUM chart is used. Default is True.
    Verbose : bool, optional
        Flag to print intermediate results. Default is True.
    missing_values : str, optional
        String that indicates how to deal with the missing values (MV). 
        The string value should be chosen among: 'omit', 'reset' and 'fill':
        'omit' removes the blocks containing MV ;
        'fill' fills-up the MV by the mean of each series ;
        'reset' resets the chart statistics at zero for gaps larger than
        a specified gap length (argument 'gap'). 
        The chart statistics is simply propagated through smaller gaps. 
        Default is 'omit'.
    gap :  int >= 0, optional
        The length of the gaps above which the chart statistics are reset,
        expressed in number of obs. Default is zero. 
    block_length :  int > 0, optional
        The length of the blocks. Default is None. 
        When None, the length is computed using an optimal formula. 
    BB_method : str, optional
       String that designates the block boostrap method chosen for sampling data. 
       Values for the string should be selected among: 
       'MBB': moving block bootstrap
       'NBB': non-overlapping block bootstrap
       'CBB': circular block bootstrap
       'MABB': matched block bootstrap
       Default is 'MBB'.
    
    Returns
    ------
    L : float
       The positive control limit of the chart (with this algorithm,
       it has the same value as the negative control limit, with opposite sign). 
       
    """
    assert np.ndim(data) == 2, "Input data must be a 2D array"
    (n_obs, n_series) = data.shape
    assert missing_values in ['fill', 'reset',
                              'omit'], "Undefined value for 'missing_values'"
    if missing_values == 'fill':
        for i in range(n_series):
            data[np.isnan(data[:, i]),
                 i] = np.nanmean(data[:,
                                      i])  #fill obs by the mean of the series

    ##Block bootstrap
    assert BB_method in ['MBB', 'NBB', 'CBB',
                         'MABB'], "Undefined block bootstrap procedure"
    if missing_values == 'fill' or missing_values == 'omit':
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length)
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length)

    else:
        if BB_method == 'MBB':
            blocks = bb.MBB(data, block_length, NaN=True)  #all_NaN=False
        elif BB_method == 'NBB':
            blocks = bb.NBB(data, block_length, NaN=True)
        elif BB_method == 'CBB':
            blocks = bb.CBB(data, block_length, NaN=True)

    if 'blocks' in locals():
        n_blocks = int(np.ceil(n / blocks.shape[1]))

    #chart parameters
    if k is None:
        k = abs(delta) / 2
    assert L_plus > L_minus, "L_plus should be superior than L_minus"
    L = (L_plus + L_minus) / 2
    n = int(n)
    assert n > 0, "n must be strictly positive"
    nmc = int(nmc)
    assert nmc > 0, "nmc must be strictly positive"
    assert rho > 0, "rho must be strictly positive"
    assert ARL0_threshold > 0, "ARL0_threshold must be strictly positive"

    ARL = 0
    while (np.abs(ARL - ARL0_threshold) > rho):
        RL_minus = np.zeros((nmc, 1))
        RL_plus = np.zeros((nmc, 1))
        RL_minus[:] = np.nan
        RL_plus[:] = np.nan
        for j in range(nmc):

            if BB_method == 'MABB':
                boot = bb.resample_MatchedBB(data, block_length, n=n)
            else:
                boot = resample(blocks, replace=True,
                                n_samples=n_blocks).flatten()[:n]

            ### Monitoring ###
            C_plus = np.zeros((n, 1))
            cp = 0
            nan_p = 0
            for i in range(1, n):
                if not np.isnan(boot[i]):
                    C_plus[i] = max(0, C_plus[i - 1] + boot[i] - k)
                    cp = 0
                elif (np.isnan(boot[i]) and cp < gap):
                    C_plus[i] = C_plus[i - 1]
                    cp += 1
                    nan_p += 1
                else:
                    C_plus[i] = 0
                    nan_p += 1
                if C_plus[i] > L:
                    RL_plus[j] = i  #-nan_p
                    break

            C_minus = np.zeros((n, 1))
            cm = 0
            nan_m = 0
            for i in range(1, n):
                if not np.isnan(boot[i]):
                    C_minus[i] = min(0, C_minus[i - 1] + boot[i] + k)
                    cm = 0
                elif (np.isnan(boot[i]) and cm < gap):
                    C_minus[i] = C_minus[i - 1]
                    cm += 1
                    nan_m += 1
                else:
                    C_minus[i] = 0
                    nan_m += 1
                if C_minus[i] < -L:
                    RL_minus[j] = i  #- nan_m
                    break

            if np.isnan(RL_plus[j]):
                RL_plus[j] = n  #- nan_p
            if np.isnan(RL_minus[j]):
                RL_minus[j] = n  #- nan_m

        if two_sided:
            ARL = (1 / (np.mean(RL_minus)) + 1 / (np.mean(RL_plus)))**(-1)
        else:
            ARL = np.mean(RL_plus)
        if ARL < ARL0_threshold:
            L_minus = (L_minus + L_plus) / 2
        elif ARL > ARL0_threshold:
            L_plus = (L_minus + L_plus) / 2
        L = (L_plus + L_minus) / 2

        if verbose:
            print(ARL)
            print(L)

    return L