def log_rank_test(t1, d1, t2, d2): """ Performs a log-rank test to evaluate the null hypothesis that two groups have the same reliability from right-censored failure data. :param t1: Survival times for the observations in the failure data for group 1. :param d1: Indicator variable values showing if observations were failures (value 1) or right-censored (value 0) for group 1. :param t2: Survival times of the observations in the failure data for group 2. :param d2: Indicator variable values showing if observations were failures (value 1) or right-censored (value 0) for group 2. :return: A tuple containing a Pandas DataFrame with a table of results from the calculations used to perform the test, the log-rank test statistic, the estimated variance of the statistic distribution and the calculated P-value for the test. """ # Convert inputs to pd.Series if not already. t1 = convert_to_pd_series(t1) d1 = convert_to_pd_series(d1) t2 = convert_to_pd_series(t2) d2 = convert_to_pd_series(d2) t = pd.concat([t1, t2]) d = pd.concat([d1, d2]) # Ordered failure times. tf = pd.Series(t[d == 1].unique()).sort_values(ignore_index=True) # Observed failures. m1 = tf.apply(lambda x: sum(t1[d1 == 1] == x)) m2 = tf.apply(lambda x: sum(t2[d2 == 1] == x)) # Number at risk. n1 = tf.apply(lambda x: sum(t1 >= x)) n2 = tf.apply(lambda x: sum(t2 >= x)) # Expected failures under null hypothesis. e1 = n1 / (n1 + n2) * (m1 + m2) e2 = n2 / (n1 + n2) * (m1 + m2) table = pd.DataFrame({ 'tf': tf, 'm1f': m1, 'm2f': m2, 'n1f': n1, 'n2f': n2, 'e1f': e1, 'e2f': e2 }) # Calculate log-rank statistic. num = (n1 * n2 * (m1 + m2) * (n1 + n2 - m1 - m2)) den = (n1 + n2).pow(2) * (n1 + n2 - 1) var = sum((num / den).replace([np.nan], 0)) log_rank_stat = pow(sum(m1) - sum(e1), 2) / var p = chi2(1).sf(log_rank_stat) return table, log_rank_stat, var, p
def mantel_test(t_min_1, t_max_1, t_min_2, t_max_2): """ Performs a Mantel test to evaluate the null hypothesis that two groups have the same reliability from interval-censored failure data. :param t_min_1: Exclusive lower bounds of the failure intervals for the observations from the group 1 failure data. :param t_max_1: Inclusive upper bounds of the failure intervals for the observations from the group 1 failure data. :param t_min_2: Exclusive lower bounds of the failure intervals for the observations from the group 2 failure data. :param t_max_2: Inclusive upper bounds of the failure intervals for the observations from the group 2 failure data. :return: A tuple containing a Pandas DataFrame with a table containing results from calculations used to perform the test, the Mantel test statistic, the estimated variance in the test statistic and the calculated P-value for the test. """ # Convert inputs to pd.Series if not already. t_min_1 = convert_to_pd_series(t_min_1) t_max_1 = convert_to_pd_series(t_max_1) t_min_2 = convert_to_pd_series(t_min_2) t_max_2 = convert_to_pd_series(t_max_2) t_min = pd.concat([t_min_1, t_min_2], ignore_index=True) t_max = pd.concat([t_max_1, t_max_2], ignore_index=True) n_1 = t_min_1.size n_2 = t_min_2.size n = n_1 + n_2 later = np.zeros(n) earlier = np.zeros(n) for i in range(n): later[i] = sum(t_min[i] >= t_max) earlier[i] = sum(t_max[i] <= t_min) v = later - earlier table = pd.DataFrame( { 't_min': t_min, 't_max': t_max, 'later': later, 'earlier': earlier, 'v': v }, index=range(1, n + 1)) table.index.name = "Observation #" var = n_1 * n_2 * sum(np.power(v, 2)) / ((n_1 + n_2) * (n_1 + n_2 - 1)) sd = np.sqrt(var) w = sum(v[:n_1]) p = norm.sf(abs(w), scale=sd) * 2 return table, w, var, p
def plot_interval_censored(tmin, tmax, ax=None, show_legend=True): """ Returns a plot of observations from interval-censored failure data. :param tmin: The exclusive lower bounds of the failure time intervals. :param tmax: The inclusive upper bound of the failure time intervals (use np.infty for right-censored observations). :param ax: Matplotlib axes on which to plot, if None then one will be created. :param show_legend: Boolean that is True if legend should be added to axes and False otherwise. :return: Matplotlib axes containing the plot. """ tmin = convert_to_pd_series(tmin) tmax = convert_to_pd_series(tmax) if ax is None: fig = plt.figure() # Create plot figure. ax = fig.add_subplot() # Create the axes to plot on. # Add the observations to the axes. for i in range(len(tmin)): if tmin.iloc[i] == tmax.iloc[i]: ax.scatter(tmin.iloc[i], i + 1, color='b', marker='o') elif tmax.iloc[i] == np.infty: ax.scatter(tmin.iloc[i], i + 1, color='b', marker='>') else: ax.hlines(i + 1, tmin.iloc[i], tmax.iloc[i], color='b') ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_ylabel('Observation #') ax.set_xlabel('Time') ax.set_xlim(0) ax.set_ylim(0) # Add legend to axes. if show_legend: legend_elements = [ Line2D([0], [0], color='b', label='Interval-censored'), Line2D([0], [0], color='w', markeredgecolor='b', markerfacecolor='b', marker='>', label='Right-censored'), Line2D([0], [0], color='w', markeredgecolor='b', markerfacecolor='b', marker='o', label='Exact') ] ax.legend(handles=legend_elements, loc='upper right') return ax
def plot_right_censored(t, d, ax=None, show_legend=True): """ Returns a plot of observations from right-censored failure data. :param t: Survival times for each observation. :param d: Indicator variable value for each observation, where value 1 indicates exact failure observed and 0 indicates failure was right-censored. :param ax: Matplotlib axes on which to plot, if None then one will be created. :param show_legend: Boolean that is True if legend should be added to axes and False otherwise. :return: Matplotlib axes containing the plot. """ t = convert_to_pd_series(t) d = convert_to_pd_series(d) if ax is None: ax = plt.gca() # Add the observations to the axes. for i in range(len(t)): if d.iloc[i]: # Failure observation. ax.scatter(t.iloc[i], i + 1, color='b', marker='o') else: # Right-censored observation. ax.scatter(t.iloc[i], i + 1, color='b', marker='>') ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_ylabel('Observation #') # Add legend to axes. if show_legend: legend_elements = [ Line2D([0], [0], color='w', markeredgecolor='b', markerfacecolor='b', marker='>', label='Right-censored'), Line2D([0], [0], color='w', markeredgecolor='b', markerfacecolor='b', marker='o', label='Exact') ] ax.legend(handles=legend_elements) return ax
def kaplan_meier_fit(t, d, ci=None, alpha=0.05): """ Produces a table from right-censored failure data containing data relating to the Kaplan-Meier estimate of the reliability function. :param t: Survival times of the observations in the failure data. :param d: Indicator variable values showing if observations were failures (value 1) or right-censored (value 0). :param ci: string with value 'gw' for Greenwood's and 'egw' for exponential Greenwood's confidence interval bounds. :param alpha: float giving level of significance for confidence interval (i.e. giving (1-alpha)*100% confidence level it contains true reliability). Default is 0.05. :return: Pandas DataFrame with columns 't', 'm', 'n' and 'R' containing the ordered failure times and corresponding number of observed failures, number at risk and Kaplan-Meier reliability estimates respectively. """ t = convert_to_pd_series(t) d = convert_to_pd_series(d) if t.size != d.size: raise ValueError("times and observed must be equal size.") # Get the ordered failure times. only_failures = t[d == 1] failures_with_zero = only_failures.append(pd.Series([0]), ignore_index=True) unique_failures = pd.Series(failures_with_zero.unique()) tf = unique_failures.sort_values(ignore_index=True) # Get the number of failures observed at the ordered failure times. mf = tf.apply(lambda x: (only_failures == x).sum()) # Get the number right-censored prior in prior interval to each ordered failure time. only_censored = t[d == 0] total_q = tf.apply(lambda x: (only_censored < x).sum()).shift(periods=-1) qf = total_q.diff() qf.iloc[0] = total_q.iloc[0] # Set censored between t_(0) and t_(1). qf.iloc[-1] = (only_censored >= tf.iloc[-1] ).sum() # Set censored after final ordered failure time. # Get the number at risk to fail at the ordered failure times. nf = tf.apply(lambda x: (t >= x).sum()) # Calculate conditional survival probability # at the ordered failure times. surv_probs = 1 - (mf / nf) # Calculate Kaplan-Meier reliability estimates at the # ordered failure times. r = surv_probs.cumprod() # Create Pandas DataFrame with results. km_table = pd.DataFrame({'t': tf, 'm': mf, 'q': qf, 'n': nf, 'R': r}) km_table.index.name = "f" # Add confidence intervals. if ci == 'gw': km_table['CI Lower'], km_table['CI Upper'] = _greenwoods_ci( mf, nf, r, alpha) elif ci == 'egw': km_table['CI Lower'], km_table['CI Upper'] = _exp_greenwoods_ci( mf, nf, r, alpha) elif ci is not None: raise ValueError( "Invalid ci value, must be 'gw' for Greenwood's or 'egw' for exponential Greenwood's" " confidence interval.") return km_table
def turnbull_fit(tmin, tmax, tol=0.001): """ Computes Turnbull estimates of reliability from interval censored failure data. Parameters ---------- tmin: Lower bounds of failure times for observations. tmax: Upper bounds of failure times for observations. tol: Tolerance for the iterative procedure, convergence terminates when maximum difference from previous reliability estimate at any time is less than tolerance. Returns ------- Pandas DataFrame with column 't' containing the interval end point time values and 'R' containing the corresponding Turnbull reliability estimates. """ tmin = convert_to_pd_series(tmin) tmax = convert_to_pd_series(tmax) if tmin.size != tmax.size: raise ValueError("t_min and t_max must be equal size.") t = np.sort(pd.Series([0]).append(tmin).append(tmax).unique()) # Initial reliability function as equal reduction at each time grid point. reliabilities = np.linspace(1.0, 0, len(t)) # Form alphas matrix describing if each observation (matrix rows) # could fail (value 1) or not (value 0) in each interval (matrix columns). n = len(tmin) m = len(t) - 1 alphas = np.empty((n, m), dtype=np.bool) for i in range(n): t_min_i = tmin.iloc[i] t_max_i = tmax.iloc[i] for j in range(m): grid_t_min_j = t[j] grid_t_max_j = t[j + 1] if t_min_i != t_max_i: alphas[(i, j)] = t_min_i < grid_t_max_j and t_max_i > grid_t_min_j else: # Exact failure observation. alphas[( i, j)] = t_min_i <= grid_t_max_j and t_max_i > grid_t_min_j while True: # Compute estimated failures in each interval. p = -np.diff(reliabilities) # Interval failure probabilities. p_alphas = alphas * p d = ((p_alphas.T / p_alphas.sum(axis=1)).T.sum(axis=0)) # Compute number at risk in each interval. y = np.cumsum(d[::-1])[::-1] updated_reliabilities = np.insert(np.cumprod(1 - (d / y)), 0, 1) difference = np.max(np.abs(updated_reliabilities - reliabilities)) reliabilities = updated_reliabilities if difference <= tol: break turnbull_table = pd.DataFrame({ 't': t, 'R': reliabilities }, index=range(1, m + 2)) return turnbull_table