Example #1
0
def test_Fit_Lognormal_2P():
    dist = Lognormal_Distribution(mu=1,sigma=0.5)
    rawdata = dist.random_samples(20, seed=5)
    data = make_right_censored_data(data=rawdata, threshold=dist.mean)
    fit = Fit_Lognormal_2P(failures=data.failures, right_censored=data.right_censored, show_probability_plot=False, print_results=False)
    assert_allclose(fit.mu, 0.9494189618970151,rtol=rtol,atol=atol)
    assert_allclose(fit.sigma, 0.4267323807168996,rtol=rtol,atol=atol)
    assert_allclose(fit.gamma, 0,rtol=rtol,atol=atol)
    assert_allclose(fit.AICc, 49.69392320890684,rtol=rtol,atol=atol)
    assert_allclose(fit.Cov_mu_sigma, 0.0025054526707355687,rtol=rtol,atol=atol)
    assert_allclose(fit.loglik, -22.494020427982832,rtol=rtol,atol=atol)
Example #2
0
def test_Fit_Lognormal_2P():
    dist = Lognormal_Distribution(mu=1, sigma=0.5)
    rawdata = dist.random_samples(20, seed=5)
    data = make_right_censored_data(data=rawdata, threshold=dist.mean)

    MLE = Fit_Lognormal_2P(failures=data.failures,
                           right_censored=data.right_censored,
                           method='MLE',
                           show_probability_plot=False,
                           print_results=False)
    assert_allclose(MLE.mu, 0.9494190246173423, rtol=rtol, atol=atol)
    assert_allclose(MLE.sigma, 0.4267323457212804, rtol=rtol, atol=atol)
    assert_allclose(MLE.gamma, 0, rtol=rtol, atol=atol)
    assert_allclose(MLE.AICc, 49.69392320890687, rtol=rtol, atol=atol)
    assert_allclose(MLE.BIC, 50.979505403073674, rtol=rtol, atol=atol)
    assert_allclose(MLE.loglik, -22.494020427982846, rtol=rtol, atol=atol)
    assert_allclose(MLE.AD, 46.91678130009629, rtol=rtol, atol=atol)
    assert_allclose(MLE.Cov_mu_sigma,
                    0.002505454567167978,
                    rtol=rtol,
                    atol=atol)

    LS = Fit_Lognormal_2P(failures=data.failures,
                          right_censored=data.right_censored,
                          method='LS',
                          show_probability_plot=False,
                          print_results=False)
    assert_allclose(LS.mu, 0.9427890879489974, rtol=rtol, atol=atol)
    assert_allclose(LS.sigma, 0.4475312141445822, rtol=rtol, atol=atol)
    assert_allclose(LS.gamma, 0, rtol=rtol, atol=atol)
    assert_allclose(LS.AICc, 49.757609068995194, rtol=rtol, atol=atol)
    assert_allclose(LS.BIC, 51.043191263162, rtol=rtol, atol=atol)
    assert_allclose(LS.loglik, -22.52586335802701, rtol=rtol, atol=atol)
    assert_allclose(LS.AD, 46.93509652892565, rtol=rtol, atol=atol)
    assert_allclose(LS.Cov_mu_sigma,
                    0.0025640250120794526,
                    rtol=rtol,
                    atol=atol)
 def __BIC_minimizer(common_shape_X): #lgtm [py/similar-function]
     '''
     __BIC_minimizer is used by the minimize function to get the sigma which gives the lowest overall BIC
     '''
     BIC_tot = 0
     for stress in unique_stresses_f:
         failure_current_stress_df = f_df[f_df['stress'] == stress]
         FAILURES = failure_current_stress_df['times'].values
         if right_censored is not None:
             if stress in unique_stresses_rc:
                 right_cens_current_stress_df = rc_df[rc_df['stress'] == stress]
                 RIGHT_CENSORED = right_cens_current_stress_df['times'].values
             else:
                 RIGHT_CENSORED = None
         else:
             RIGHT_CENSORED = None
         lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape_X)
         BIC_tot += lognormal_fit_common_shape.BIC
     return BIC_tot
def HistogramPLOT_all(data, month, year):
    #Initiate
    Situation = []
    mon = [
        'January', 'Febuary', 'March', 'April', 'May', 'June', 'July',
        'August', 'September', 'October', 'November', 'December'
    ]
    #Get just Full day data
    logicF = (data["isFULL"].apply(lambda x: x) == (1))
    data01 = data[logicF].copy()
    data01.fillna(method='ffill', inplace=True)

    logicY = (data01["DateTime"].apply(lambda x: x.year) == (year))
    data01 = data01[logicY].copy()

    fig = plt.figure(figsize=(24, 18), dpi=80, facecolor='w', edgecolor='r')
    #Plotting 12 graph
    xvals = np.linspace(0, 30, 1000)
    for i in range(month):
        ax = plt.subplot2grid((4, 3), (int(np.floor(i / 3)), int(i % 3)))
        logic = (data01["DateTime"].apply(lambda x: x.month)) == (i + 1)
        ws = data01['WS95'][logic]
        ws = ws + 0.0001
        failures = []
        censored = []
        threshold = 30
        for item in ws:
            if item > threshold:
                censored.append(threshold)
            else:
                failures.append(item)
        xvals = np.linspace(0, 30, 1000)
        print(ws.shape)
        if (np.sum(logic) != 0):
            ax.hist(ws, bins=30, normed=True)
            hist, edge = np.histogram(np.array(ws),
                                      bins=1000,
                                      range=(0, 30),
                                      normed=True)
            wb2 = Fit_Weibull_2P(failures=failures,
                                 show_probability_plot=False,
                                 print_results=False)
            wb3 = Fit_Weibull_3P(failures=failures,
                                 show_probability_plot=False,
                                 print_results=False)
            gm2 = Fit_Gamma_2P(failures=failures,
                               show_probability_plot=False,
                               print_results=False)
            gm3 = Fit_Gamma_3P(failures=failures,
                               show_probability_plot=False,
                               print_results=False)
            ln2 = Fit_Lognormal_2P(failures=failures,
                                   show_probability_plot=False,
                                   print_results=False)
            wbm = Fit_Weibull_Mixture(failures=failures,
                                      right_censored=censored,
                                      show_plot=False,
                                      print_results=False)

            wb2_pdf = Weibull_Distribution(alpha=wb2.alpha, beta=wb2.beta).PDF(
                xvals=xvals, show_plot=True, label='Weibull_2P')
            wb3_pdf = Weibull_Distribution(alpha=wb3.alpha,
                                           beta=wb3.beta,
                                           gamma=wb3.gamma).PDF(
                                               xvals=xvals,
                                               show_plot=True,
                                               label='Weibull_3P')
            gm2_pdf = Gamma_Distribution(alpha=gm2.alpha,
                                         beta=gm2.beta).PDF(xvals=xvals,
                                                            show_plot=True,
                                                            label='Gamma_2P')
            gm3_pdf = Gamma_Distribution(alpha=gm3.alpha,
                                         beta=gm3.beta,
                                         gamma=gm3.gamma).PDF(xvals=xvals,
                                                              show_plot=True,
                                                              label='Gamma_3P')
            ln2_pdf = Lognormal_Distribution(mu=ln2.mu, sigma=ln2.sigma).PDF(
                xvals=xvals, show_plot=True, label='Lognormal_2P')

            part1_pdf = Weibull_Distribution(alpha=wbm.alpha_1,
                                             beta=wbm.beta_1).PDF(
                                                 xvals=xvals, show_plot=False)
            part2_pdf = Weibull_Distribution(alpha=wbm.alpha_2,
                                             beta=wbm.beta_2).PDF(
                                                 xvals=xvals, show_plot=False)
            Mixture_PDF = part1_pdf * wbm.proportion_1 + part2_pdf * wbm.proportion_2
            ax.plot(xvals, Mixture_PDF, label='Weibull_Mixture')
        ax.legend()
        ax.set_ylim(0, 0.16)
        ax.set_xlim(0, 30)
        ax.set_xticks([0, 5, 10, 15, 20, 25, 30])
        ax.tick_params(axis="x", labelsize=20)
        ax.tick_params(axis="y", labelsize=20)
        ax.set_title('{}'.format(mon[i]), fontweight='bold', size=20)
    plt.tight_layout()
    plt.show()
    def __init__(self, failures, failure_stress, right_censored=None, right_censored_stress=None, print_results=True, show_plot=True, common_shape_method='BIC'):

        # input type checking and converting to arrays in preperation for creation of dataframe
        if common_shape_method not in ['BIC', 'weighted_average', 'average']:
            raise ValueError('common_shape_method must be either BIC, weighted_average, or average. Default is BIC.')
        if len(failures) != len(failure_stress):
            raise ValueError('The length of failures does not match the length of failure_stress')
        if type(failures) is list:
            failures = np.array(failures)
        elif type(failures) is np.ndarray:
            pass
        else:
            raise ValueError('failures must be an array or list')
        if type(failure_stress) is list:
            failure_stress = np.array(failure_stress)
        elif type(failure_stress) is np.ndarray:
            pass
        else:
            raise ValueError('failure_stress must be an array or list')
        if right_censored is not None:
            if len(right_censored) != len(right_censored_stress):
                raise ValueError('The length of right_censored does not match the length of right_censored_stress')
            if type(right_censored) is list:
                right_censored = np.array(right_censored)
            elif type(right_censored) is np.ndarray:
                pass
            else:
                raise ValueError('right_censored must be an array or list')
            if type(right_censored_stress) is list:
                right_censored_stress = np.array(right_censored_stress)
            elif type(right_censored_stress) is np.ndarray:
                pass
            else:
                raise ValueError('right_censored_stress must be an array or list')

        xmin = np.floor(np.log10(min(failures))) - 1
        xmax = np.ceil(np.log10(max(failures))) + 1
        xvals = np.logspace(xmin, xmax, 100)

        if right_censored is not None:
            TIMES = np.hstack([failures, right_censored])
            STRESS = np.hstack([failure_stress, right_censored_stress])
            CENS_CODES = np.hstack([np.ones_like(failures), np.zeros_like(right_censored)])
        else:
            TIMES = failures
            STRESS = failure_stress
            CENS_CODES = np.ones_like(failures)

        data = {'times': TIMES, 'stress': STRESS, 'cens_codes': CENS_CODES}
        df = pd.DataFrame(data, columns=['times', 'stress', 'cens_codes'])
        df_sorted = df.sort_values(by=['cens_codes', 'stress', 'times'])
        is_failure = df_sorted['cens_codes'] == 1
        is_right_cens = df_sorted['cens_codes'] == 0
        f_df = df_sorted[is_failure]
        rc_df = df_sorted[is_right_cens]
        unique_stresses_f = f_df.stress.unique()
        if right_censored is not None:
            unique_stresses_rc = rc_df.stress.unique()
            for item in unique_stresses_rc:  # check that there are no unique right_censored stresses that are not also in failure stresses
                if item not in unique_stresses_f:
                    raise ValueError('The right_censored_stress array contains values that are not in the failure_stress array. This is equivalent to trying to fit a distribution to only censored data and cannot be done.')

        lognormal_fit_mu_array = []
        lognormal_fit_sigma_array = []
        lognormal_fit_mu_array_common_shape = []
        color_list = ['steelblue', 'darkorange', 'red', 'green', 'purple', 'blue', 'grey', 'deeppink', 'cyan', 'chocolate']
        weights_array = []
        # within this loop, each list of failures and right censored values will be unpacked for each unique stress to find the common sigma parameter
        for stress in unique_stresses_f:
            failure_current_stress_df = f_df[f_df['stress'] == stress]
            FAILURES = failure_current_stress_df['times'].values
            len_f = len(FAILURES)
            if right_censored is not None:
                if stress in unique_stresses_rc:
                    right_cens_current_stress_df = rc_df[rc_df['stress'] == stress]
                    RIGHT_CENSORED = right_cens_current_stress_df['times'].values
                    len_rc = len(RIGHT_CENSORED)
                else:
                    RIGHT_CENSORED = None
                    len_rc = 0
            else:
                RIGHT_CENSORED = None
                len_rc = 0

            weights_array.append(len_f + len_rc)
            lognormal_fit = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False)
            lognormal_fit_mu_array.append(lognormal_fit.mu)
            lognormal_fit_sigma_array.append(lognormal_fit.sigma)
        common_shape_guess = np.average(lognormal_fit_sigma_array)

        def __BIC_minimizer(common_shape_X): #lgtm [py/similar-function]
            '''
            __BIC_minimizer is used by the minimize function to get the sigma which gives the lowest overall BIC
            '''
            BIC_tot = 0
            for stress in unique_stresses_f:
                failure_current_stress_df = f_df[f_df['stress'] == stress]
                FAILURES = failure_current_stress_df['times'].values
                if right_censored is not None:
                    if stress in unique_stresses_rc:
                        right_cens_current_stress_df = rc_df[rc_df['stress'] == stress]
                        RIGHT_CENSORED = right_cens_current_stress_df['times'].values
                    else:
                        RIGHT_CENSORED = None
                else:
                    RIGHT_CENSORED = None
                lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape_X)
                BIC_tot += lognormal_fit_common_shape.BIC
            return BIC_tot

        if common_shape_method == 'BIC':
            optimized_sigma_results = minimize(__BIC_minimizer, x0=common_shape_guess, method='nelder-mead')
            common_shape = optimized_sigma_results.x[0]
        elif common_shape_method == 'weighted_average':
            total_data = sum(weights_array)
            weights = np.array(weights_array) / total_data
            common_shape = sum(weights * np.array(lognormal_fit_sigma_array))
        elif common_shape_method == 'average':
            common_shape = common_shape_guess  # this was just the numerical average obtained above
        self.common_shape = common_shape

        # within this loop, each list of failures and right censored values will be unpacked for each unique stress and plotted as a probability plot as well as the CDF of the common sigma plot
        AICc_total = 0
        BIC_total = 0
        AICc = True
        for i, stress in enumerate(unique_stresses_f):
            failure_current_stress_df = f_df[f_df['stress'] == stress]
            FAILURES = failure_current_stress_df['times'].values
            if right_censored is not None:
                if stress in unique_stresses_rc:
                    right_cens_current_stress_df = rc_df[rc_df['stress'] == stress]
                    RIGHT_CENSORED = right_cens_current_stress_df['times'].values
                else:
                    RIGHT_CENSORED = None
            else:
                RIGHT_CENSORED = None
            lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape)
            lognormal_fit_mu_array_common_shape.append(lognormal_fit_common_shape.mu)
            if type(lognormal_fit_common_shape.AICc) == str:
                AICc = False
            else:
                AICc_total += lognormal_fit_common_shape.AICc
            BIC_total += lognormal_fit_common_shape.BIC
            if show_plot is True:
                lognormal_fit_common_shape.distribution.CDF(linestyle='--', color=color_list[i], xvals=xvals)
                Probability_plotting.Lognormal_probability_plot(failures=FAILURES, right_censored=RIGHT_CENSORED, color=color_list[i], label=str(stress))
                plt.legend(title='Stress')
                plt.xlim(10 ** (xmin + 1), 10 ** (xmax - 1))
                if common_shape_method == 'BIC':
                    plt.title(str('ALT Lognormal Probability Plot\nOptimal BIC ' + r'$\sigma$ = ' + str(round(common_shape, 4))))
                elif common_shape_method == 'weighted_average':
                    plt.title(str('ALT Lognormal Probability Plot\nWeighted average ' + r'$\sigma$ = ' + str(round(common_shape, 4))))
                elif common_shape_method == 'average':
                    plt.title(str('ALT Lognormal Probability Plot\nAverage ' + r'$\sigma$ = ' + str(round(common_shape, 4))))

        self.BIC_sum = np.sum(BIC_total)
        if AICc is True:
            self.AICc_sum = np.sum(AICc_total)
        else:
            self.AICc_sum = 'Insufficient Data'
        sigma_difs = (common_shape - np.array(lognormal_fit_sigma_array)) / np.array(lognormal_fit_sigma_array)
        sigma_differences = []
        for item in sigma_difs:
            if item > 0:
                sigma_differences.append(str('+' + str(round(item * 100, 2)) + '%'))
            else:
                sigma_differences.append(str(str(round(item * 100, 2)) + '%'))
        results = {'stress': unique_stresses_f, 'original mu': lognormal_fit_mu_array, 'original sigma': lognormal_fit_sigma_array, 'new mu': lognormal_fit_mu_array_common_shape, 'common sigma': np.ones_like(unique_stresses_f) * common_shape, 'sigma change': sigma_differences}
        results_df = pd.DataFrame(results, columns=['stress', 'original mu', 'original sigma', 'new mu', 'common sigma', 'sigma change'])
        blankIndex = [''] * len(results_df)
        results_df.index = blankIndex
        self.results = results_df
        if print_results is True:
            pd.set_option('display.width', 200)  # prevents wrapping after default 80 characters
            pd.set_option('display.max_columns', 9)  # shows the dataframe without ... truncation
            print('\nALT Lognormal probability plot results:')
            print(self.results)
            print('Total AICc:', self.AICc_sum)
            print('Total BIC:', self.BIC_sum)