Ejemplo n.º 1
0
def fit_weibull(df, x_grid=None):
    # Initialize the model and fit our data
    wbf = WeibullFitter()
    wbf.fit(df["offset"], df["observed"])

    # Get weibull parameters
    params = {"scale": wbf.lambda_, "shape": wbf.rho_}

    # If x_grid is provided, return y
    if x_grid is not None:
        pdf = wbf.density_at_times(x_grid).to_numpy()
        return params, pdf
    else:
        return params
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
# aalen additive

if __name__ == "__main__":
    import pandas as pd
    import numpy as np
    import time

    from lifelines import WeibullFitter

    np.random.seed(1)
    N = 250000
    mu = 3 * np.random.randn()
    sigma = np.random.uniform(0.1, 3.0)

    X, C = np.exp(sigma * np.random.randn(N) +
                  mu), np.exp(np.random.randn(N) + mu)
    E = X <= C
    T = np.minimum(X, C)

    wb = WeibullFitter()
    start_time = time.time()
    wb.fit(T, E)
    print("--- %s seconds ---" % (time.time() - start_time))
    wb.print_summary(5)
# -*- coding: utf-8 -*-
import numpy as np
from lifelines import WeibullFitter

lambda_, rho_ = 2, 0.5
N = 10000

T_actual = lambda_ * np.random.exponential(1, size=N) ** (1 / rho_)
T_censor = lambda_ * np.random.exponential(1, size=N) ** (1 / rho_)
T = np.minimum(T_actual, T_censor)
E = T_actual < T_censor

time = [1.0]

# lifelines computed confidence interval
wf = WeibullFitter()
print(wf.fit(T, E, timeline=time).confidence_interval_cumulative_hazard_)


bootstrap_samples = 10000
results = []

for _ in range(bootstrap_samples):
    ix = np.random.randint(0, 10000, 10000)
    wf = WeibullFitter().fit(T[ix], E[ix], timeline=time)
    results.append(wf.cumulative_hazard_at_times(time).values[0])
    print(np.percentile(results, [2.5, 97.5]))
Ejemplo n.º 4
0
    kmf.plot(ax=ax)

# PURPOSE
ax = plt.subplot()
for purpose in df_cox.PURPOSE.unique():
    is_pur = (df_cox.PURPOSE == purpose)
    kmf.fit(T[is_pur], event_observed=E[is_pur], label=purpose)
    kmf.plot(ax=ax)

############################################################
# WeibullFitter
############################################################
from lifelines import WeibullFitter

wf = WeibullFitter()
wf.fit(T, E)
print(wf.lambda_, wf.rho_)
wf.print_summary()
wf.plot()

############################################################
# NelsonAalenFitter
############################################################
from lifelines import NelsonAalenFitter

naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
naf.plot()

# univariate analysis: cum hazard
    print(len('General Weibull distribution:') * '-')
    # bool_up = (df.Type == 'RunTime')
    # bool_down = ((df.Type == 'DownTime') & (df.ReasonId.isin(reasons_relative)))
    # continue_obs = ((df.Type == 'DownTime') & (df.ReasonId.isin(reasons_absolute + reasons_not_considered + reasons_availability)))
    # stop_obs = (df.Type == 'Break')

    bool_up = (df_task['Type'] == 'RunTime') # List of all RunTimes
    bool_down = (df_task['Type'].isin(['DownTime', 'Break'])) & (df_task['ReasonId'].isin(reasons_relative)) # List of all DownTimes in calculation
    bool_ignore = (df_task['Type'].isin(['DownTime', 'Break'])) & (df_task['ReasonId'].isin(reasons_availability + reasons_absolute)) # List of all breaks to ignore
    bool_break = (df_task['Type'].isin(['DownTime', 'Break'])) & (df_task['ReasonId'].isin(reasons_break)) # List of all breaks to stop observation

    uptime, downtime, obs_up, obs_down = duration_run_down(list(df_task['Duration'] / 3600), list(bool_up), list(bool_down), 
                                                           list(bool_ignore), list(bool_break), observation=True)
    wf = WeibullFitter()
    try:
        wf.fit(uptime, obs_up)
        weib = Weibull(wf.lambda_, wf.rho_)
    except:
        print(uptime)
        raise
    if print_all:
        print(weib)
    if export_all:
        general_dist = ET.SubElement(root, 'general_dist')
        general_dist.text = 'weibull'
        general_dist.set("lambda", str(wf.lambda_))
        general_dist.set("rho", str(wf.rho_))
        general_dist.set("mean", str(weib.mean_time()))
        plot_hist(uptime, obs_up, 99, weib)
        plt.title(f'Probability of failure in time [general]'#, reasons: ' +  ', '.join([str(x) for x in reasons_relative])
                  )