def test_qq_plot_right_censoring_with_known_distribution(self, block): N = 3000 T_actual = scipy.stats.fisk(8, 0, 1).rvs(N) C = scipy.stats.fisk(8, 0, 1).rvs(N) E = T_actual < C T = np.minimum(T_actual, C) fig, axes = self.plt.subplots(2, 2, figsize=(9, 5)) axes = axes.reshape(4) for i, model in enumerate([ WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter() ]): model.fit(T, E) ax = qq_plot(model, ax=axes[i]) assert ax is not None self.plt.suptitle( "test_qq_plot_right_censoring_with_known_distribution") self.plt.show(block=block)
def test_qq_plot_left_censoring_with_known_distribution(self, block): N = 300 T_actual = scipy.stats.fisk(8, 0, 1).rvs(N) MIN_0 = np.percentile(T_actual, 5) MIN_1 = np.percentile(T_actual, 10) T = T_actual.copy() ix = np.random.randint(3, size=N) T = np.where(ix == 0, np.maximum(T, MIN_0), T) T = np.where(ix == 1, np.maximum(T, MIN_1), T) E = T_actual == T fig, axes = self.plt.subplots(2, 2, figsize=(9, 5)) axes = axes.reshape(4) for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]): model.fit_left_censoring(T, E) ax = qq_plot(model, ax=axes[i]) assert ax is not None self.plt.suptitle("test_qq_plot_left_censoring_with_known_distribution") self.plt.show(block=block)
MIN_2 = np.percentile(T_actual, 30) MIN_3 = np.percentile(T_actual, 50) T = T_actual.copy() ix = np.random.randint(4, size=N) T = np.where(ix == 0, np.maximum(T, MIN_0), T) T = np.where(ix == 1, np.maximum(T, MIN_1), T) T = np.where(ix == 2, np.maximum(T, MIN_2), T) T = np.where(ix == 3, np.maximum(T, MIN_3), T) E = T_actual == T fig, axes = plt.subplots(2, 2, figsize=(9, 5)) axes = axes.reshape(4) for i, model in enumerate([WeibullFitter(), KaplanMeierFitter(), LogNormalFitter(), LogLogisticFitter()]): if isinstance(model, KaplanMeierFitter): model.fit(T, E, left_censorship=True, label=model.__class__.__name__) else: model.fit(T, E, left_censorship=True, label=model.__class__.__name__) model.plot_cumulative_density(ax=axes[i]) plt.tight_layout() for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter()]): model.fit(T, E, left_censorship=True) fig, axes = plt.subplots(2, 1, figsize=(8, 6)) left_censorship_cdf_plot(model, ax=axes[0]) qq_plot(model, ax=axes[1])
model = model # instantiate the class to create an object for the input model # Two Cohorts are compared. 1. Streaming TV Not Subsribed by Users, 2. Streaming TV subscribed by the users. groups = data['StreamingTV'] # group i1 , having the pandas series for the 1st cohort i1 = (groups == 'No') # group i2 , having the pandas series for the 2nd cohort i2 = (groups == 'Yes') # fit the model for 1st cohort model.fit(T[i1], E[i1], label='Not Subscribed StreamingTV') a1 = model.plot(ax=axes) # fit the model for 2nd cohort model.fit(T[i2], E[i2], label='Subscribed StreamingTV') model.plot(ax=axes) # Churn by subscribe for the lognormal model churn_by_subscribe(LogNormalFitter(), axes[0][0]) # Churn by subscribe for the weibull model churn_by_subscribe(WeibullFitter(), axes[0][1]) # Churn by subscribe for the loglogistic model churn_by_subscribe(LogLogisticFitter(), axes[1][0]) # Churn by subscribe for the Exponential model churn_by_subscribe(ExponentialFitter(), axes[1][1]) # Function for adding subtitles and labels plot_details('Subscribed', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
# -*- coding: utf-8 -*- # aalen additive if __name__ == "__main__": import pandas as pd import numpy as np import time from lifelines import LogNormalFitter np.random.seed(1) N = 250000 mu = 3 * np.random.randn() sigma = np.random.uniform(0.1, 3.0) X, C = np.exp(sigma * np.random.randn(N) + mu), np.exp(np.random.randn(N) + mu) E = X <= C T = np.minimum(X, C) lnf = LogNormalFitter() start_time = time.time() lnf.fit(T, E) print("--- %s seconds ---" % (time.time() - start_time)) lnf.print_summary(5)
def churn_by_gender(model, axes): model = model # instantiate the class to create an object for required model groups = data['gender'] # group i1 , having the pandas series for the 1st cohort j1 = (groups == 'Male') # group i2 , having the pandas series for the 2nd cohort j2 = (groups == 'Female') # fit the model for 1st cohort model.fit(T[j1], E[j1], label='Male') a1 = model.plot(ax=axes) # fit the model for 2nd cohort model.fit(T[j2], E[j2], label='Female') model.plot(ax=axes) # Churn by gender for the lognormal model churn_by_gender(LogNormalFitter(), axes[0][0]) # Churn by gender for the weibull model churn_by_gender(WeibullFitter(), axes[0][1]) # Churn by gender for the loglogistic model churn_by_gender(LogLogisticFitter(), axes[1][0]) # Churn by gender for the Exponential model churn_by_gender(ExponentialFitter(), axes[1][1]) # Function for adding subtitles and labels plot_details('Gender', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
import pandas as pd data = pd.read_csv('Dataset/telco_customer.csv') data['tenure'] = pd.to_numeric(data['tenure']) data = data[data['tenure'] > 0] # Replace yes and No in the Churn column to 1 and 0. 1 for the event and 0 for the censured data. data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0) fig, axes = plt.subplots(2, 2, figsize=(16, 12)) T = data['tenure'] E = data['Churn'] wbf = WeibullFitter().fit(T, E, label='WeibullFitter') ef = ExponentialFitter().fit(T, E, label='ExponentialFitter') lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter') llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter') wbf.plot_cumulative_hazard(ax=axes[0][0]) ef.plot_cumulative_hazard(ax=axes[0][1]) lnf.plot_cumulative_hazard(ax=axes[1][0]) llf.plot_cumulative_hazard(ax=axes[1][1]) plt.suptitle( 'Parametric Model Implementation of the Telco dataset using different models' ) fig.text(0.5, 0.04, 'Timeline', ha='center') fig.text(0.04, 0.5, 'Probability', va='center', rotation='vertical') plt.savefig('Images/WeiExpLogx.jpeg') plt.show()
model = model # instantiate the class to create an object for choosen model # Three cohorts are compared on the basis of the contract groups = data['Contract'] x1 = (groups == 'Month-to-month') x2 = (groups == 'Two year') x3 = (groups == 'One year') model.fit(T[x1], E[x1], label='Month-to-month') ax = model.plot(ax=axes) model.fit(T[x2], E[x2], label='Two year') ax1 = model.plot(ax=axes) ac1 = model.plot model.fit(T[x3], E[x3], label='One year') model.plot(ax=axes) # Churn by contract for the lognormal model churn_by_contract(LogNormalFitter(), axes[0][0]) # Churn by contract for the weibull model churn_by_contract(WeibullFitter(), axes[0][1]) # Churn by contract for the loglogistic model churn_by_contract(LogLogisticFitter(), axes[1][0]) # Churn by contract for the Exponential model churn_by_contract(ExponentialFitter(), axes[1][1]) # Function for adding subtitles and labels plot_details('Contract', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
general_dist.set("rho", str(wf.rho_)) general_dist.set("mean", str(weib.mean_time())) plot_hist(uptime, obs_up, 99, weib) plt.title(f'Probability of failure in time [general]'#, reasons: ' + ', '.join([str(x) for x in reasons_relative]) ) plt.savefig(rf'./{file_used.split(".")[0]}/figures/fail_prob_{file_used.split(".")[0]}_general.pdf', dpi=2400, layout='tight') plt.savefig(rf'./{file_used.split(".")[0]}/figures/fail_prob_{file_used.split(".")[0]}_general.png', dpi=2400, layout='tight') plt.close() # GENERATE REPAIR TIME DEFINITION # Generate a lognormal distribution for the repair time print('Repair time') print(len('Repair time') * '-') lnf = LogNormalFitter() try: lnf.fit(downtime, obs_down) logn = Lognormal(lnf.sigma_, lnf.mu_) except: raise if print_all: print(logn) if export_all: repair_dist = ET.SubElement(root, 'repair_dist') repair_dist.text = 'lognormal' repair_dist.set("sigma", str(lnf.sigma_)) repair_dist.set("mu", str(lnf.mu_)) repair_dist.set("mean", str(logn.mean_time())) plot_hist(downtime, obs_down, 99, logn) plt.title(f"Probability of repair in time"#, reasons: {', '.join([str(x) for x in reasons_relative])}"
import pandas as pd data = pd.read_csv('Dataset/telco_customer.csv') data['tenure'] = pd.to_numeric(data['tenure']) data = data[data['tenure'] > 0] # Replace yes and No in the Churn column to 1 and 0. 1 for the event and 0 for the censured data. data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0) fig, axes = plt.subplots(2, 2, figsize=(16, 12)) T = data['tenure'] E = data['Churn'] wbf = WeibullFitter().fit(T, E, label='WeibullFitter') ef = ExponentialFitter().fit(T, E, label='ExponentialFitter') lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter') llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter') wbf.plot_survival_function(ax=axes[0][0]) ef.plot_survival_function(ax=axes[0][1]) lnf.plot_survival_function(ax=axes[1][0]) llf.plot_survival_function(ax=axes[1][1]) plt.suptitle( 'Implementation of Paramteric Models to create survival functions on the teleco dataset' ) fig.text(0.5, 0.04, 'Timeline', ha='center') fig.text(0.04, 0.5, 'Probability', va='center', rotation='vertical') plt.savefig('Images/SurvivalFunctions.jpeg') plt.show()
T = T_actual.copy() ix = np.random.randint(4, size=N) T = np.where(ix == 0, np.maximum(T, MIN_0), T) T = np.where(ix == 1, np.maximum(T, MIN_1), T) T = np.where(ix == 2, np.maximum(T, MIN_2), T) T = np.where(ix == 3, np.maximum(T, MIN_3), T) E = T_actual == T fig, axes = plt.subplots(2, 2, figsize=(9, 5)) axes = axes.reshape(4) for i, model in enumerate([ WeibullFitter(), KaplanMeierFitter(), LogNormalFitter(), LogLogisticFitter() ]): if isinstance(model, KaplanMeierFitter): model.fit_left_censoring(T, E, label=model.__class__.__name__) else: model.fit_left_censoring(T, E, label=model.__class__.__name__) model.plot_cumulative_density(ax=axes[i]) plt.tight_layout() for i, model in enumerate( [WeibullFitter(), LogNormalFitter(), LogLogisticFitter()]): model.fit_left_censoring(T, E) fig, axes = plt.subplots(2, 1, figsize=(8, 6))
def churn_by_partner(model, axes): model = model # instantiate the class to create an object of required model groups = data['Partner'] # group i1 , having the pandas series for the 1st cohort k1 = (groups == 'No') # group i2 , having the pandas series for the 2nd cohort k2 = (groups == 'Yes') # fit the model for 1st cohort model.fit(T[k1], E[k1], label='Do not have a partner') a1 = model.plot(ax=axes) # fit the model for 2nd cohort model.fit(T[k2], E[k2], label='Have a partner') model.plot(ax=axes) # Churn by partner for the lognormal model churn_by_partner(LogNormalFitter(), axes[0][0]) # Churn by partner for the weibull model churn_by_partner(WeibullFitter(), axes[0][1]) # Churn by partner for the loglogistic model churn_by_partner(LogLogisticFitter(), axes[1][0]) # Churn by partner for the Exponential model churn_by_partner(ExponentialFitter(), axes[1][1]) # Function for adding subtitles and labels plot_details('Partner', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)