Exemple #1
0
    def test_qq_plot_left_censoring_with_known_distribution(self, block):
        N = 300
        T_actual = scipy.stats.fisk(8, 0, 1).rvs(N)

        MIN_0 = np.percentile(T_actual, 5)
        MIN_1 = np.percentile(T_actual, 10)

        T = T_actual.copy()
        ix = np.random.randint(3, size=N)

        T = np.where(ix == 0, np.maximum(T, MIN_0), T)
        T = np.where(ix == 1, np.maximum(T, MIN_1), T)
        E = T_actual == T

        fig, axes = self.plt.subplots(2, 2, figsize=(9, 5))
        axes = axes.reshape(4)
        for i, model in enumerate([
                WeibullFitter(),
                LogNormalFitter(),
                LogLogisticFitter(),
                ExponentialFitter()
        ]):
            model.fit_left_censoring(T, E)
            ax = qq_plot(model, ax=axes[i])
            assert ax is not None
        self.plt.suptitle(
            "test_qq_plot_left_censoring_with_known_distribution")
        self.plt.show(block=block)
Exemple #2
0
 def test_qq_plot_left_censoring2(self, block):
     df = load_lcd()
     fig, axes = self.plt.subplots(2, 2, figsize=(9, 5))
     axes = axes.reshape(4)
     for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]):
         model.fit_left_censoring(df["T"], df["E"])
         ax = qq_plot(model, ax=axes[i])
         assert ax is not None
     self.plt.suptitle("test_qq_plot_left_censoring2")
     self.plt.show(block=block)
Exemple #3
0
 def test_right_censorship_cdf_plots(self, block):
     df = load_rossi()
     fig, axes = self.plt.subplots(2, 2, figsize=(9, 5))
     axes = axes.reshape(4)
     for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]):
         model.fit(df["week"], df["arrest"])
         ax = cdf_plot(model, ax=axes[i])
         assert ax is not None
     self.plt.suptitle("test_right_censorship_cdf_plots")
     self.plt.show(block=block)
Exemple #4
0
 def test_left_censorship_cdf_plots(self, block):
     df = load_nh4()
     fig, axes = self.plt.subplots(2, 2, figsize=(9, 5))
     axes = axes.reshape(4)
     for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]):
         model.fit_left_censoring(df["NH4.mg.per.L"], ~df["Censored"])
         ax = cdf_plot(model, ax=axes[i])
         assert ax is not None
     self.plt.suptitle("test_left_censorship_cdf_plots")
     self.plt.show(block=block)
Exemple #5
0
    def test_qq_plot_right_censoring_with_known_distribution(self, block):
        N = 3000
        T_actual = scipy.stats.fisk(8, 0, 1).rvs(N)
        C = scipy.stats.fisk(8, 0, 1).rvs(N)
        E = T_actual < C
        T = np.minimum(T_actual, C)

        fig, axes = self.plt.subplots(2, 2, figsize=(9, 5))
        axes = axes.reshape(4)
        for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]):
            model.fit(T, E)
            ax = qq_plot(model, ax=axes[i])
            assert ax is not None
        self.plt.suptitle("test_qq_plot_right_censoring_with_known_distribution")
        self.plt.show(block=block)
MIN_2 = np.percentile(T_actual, 30)
MIN_3 = np.percentile(T_actual, 50)

T = T_actual.copy()
ix = np.random.randint(4, size=N)

T = np.where(ix == 0, np.maximum(T, MIN_0), T)
T = np.where(ix == 1, np.maximum(T, MIN_1), T)
T = np.where(ix == 2, np.maximum(T, MIN_2), T)
T = np.where(ix == 3, np.maximum(T, MIN_3), T)
E = T_actual == T

fig, axes = plt.subplots(2, 2, figsize=(9, 5))
axes = axes.reshape(4)

for i, model in enumerate([WeibullFitter(), KaplanMeierFitter(), LogNormalFitter(), LogLogisticFitter()]):
    if isinstance(model, KaplanMeierFitter):
        model.fit(T, E, left_censorship=True, label=model.__class__.__name__)
    else:
        model.fit(T, E, left_censorship=True, label=model.__class__.__name__)

    model.plot_cumulative_density(ax=axes[i])
plt.tight_layout()

for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter()]):
    model.fit(T, E, left_censorship=True)
    fig, axes = plt.subplots(2, 1, figsize=(8, 6))

    left_censorship_cdf_plot(model, ax=axes[0])
    qq_plot(model, ax=axes[1])
Exemple #7
0
data = pd.read_csv('Dataset/telco_customer.csv')
data['tenure'] = pd.to_numeric(data['tenure'])
data = data[data['tenure'] > 0]

# Replace yes and No in the Churn column to 1 and 0. 1 for the event and 0 for the censured data.
data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

T = data['tenure']
E = data['Churn']

wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
ef = ExponentialFitter().fit(T, E, label='ExponentialFitter')
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')

wbf.plot_cumulative_hazard(ax=axes[0][0])
ef.plot_cumulative_hazard(ax=axes[0][1])
lnf.plot_cumulative_hazard(ax=axes[1][0])
llf.plot_cumulative_hazard(ax=axes[1][1])

plt.suptitle(
    'Parametric Model Implementation of the Telco dataset using different models'
)

fig.text(0.5, 0.04, 'Timeline', ha='center')
fig.text(0.04, 0.5, 'Probability', va='center', rotation='vertical')
plt.savefig('Images/WeiExpLogx.jpeg')
plt.show()
Exemple #8
0
    model = model  # instantiate the class to create an object for the input model

    # Two Cohorts are compared. 1. Streaming TV Not Subsribed by Users, 2. Streaming TV subscribed by the users.
    groups = data['StreamingTV']
    # group i1 , having the pandas series for the 1st cohort
    i1 = (groups == 'No')
    # group i2 , having the pandas series for the 2nd cohort
    i2 = (groups == 'Yes')

    # fit the model for 1st cohort
    model.fit(T[i1], E[i1], label='Not Subscribed StreamingTV')
    a1 = model.plot(ax=axes)

    # fit the model for 2nd cohort
    model.fit(T[i2], E[i2], label='Subscribed StreamingTV')
    model.plot(ax=axes)


# Churn by subscribe for the lognormal model
churn_by_subscribe(LogNormalFitter(), axes[0][0])
# Churn by subscribe for the weibull model
churn_by_subscribe(WeibullFitter(), axes[0][1])
# Churn by subscribe for the loglogistic model
churn_by_subscribe(LogLogisticFitter(), axes[1][0])
# Churn by subscribe for the Exponential model
churn_by_subscribe(ExponentialFitter(), axes[1][1])

# Function for adding subtitles and labels
plot_details('Subscribed', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
def churn_by_gender(model, axes):
    model = model  # instantiate the class to create an object for required model

    groups = data['gender']
    # group i1 , having the pandas series for the 1st cohort
    j1 = (groups == 'Male')
    # group i2 , having the pandas series for the 2nd cohort
    j2 = (groups == 'Female')

    # fit the model for 1st cohort
    model.fit(T[j1], E[j1], label='Male')
    a1 = model.plot(ax=axes)

    # fit the model for 2nd cohort
    model.fit(T[j2], E[j2], label='Female')
    model.plot(ax=axes)


# Churn by gender for the lognormal model
churn_by_gender(LogNormalFitter(), axes[0][0])
# Churn by gender for the weibull model
churn_by_gender(WeibullFitter(), axes[0][1])
# Churn by gender for the loglogistic model
churn_by_gender(LogLogisticFitter(), axes[1][0])
# Churn by gender for the Exponential model
churn_by_gender(ExponentialFitter(), axes[1][1])

# Function for adding subtitles and labels
plot_details('Gender', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
Exemple #10
0
    model = model  # instantiate the class to create an object for choosen model

    # Three cohorts are compared on the basis of the contract
    groups = data['Contract']
    x1 = (groups == 'Month-to-month')
    x2 = (groups == 'Two year')
    x3 = (groups == 'One year')

    model.fit(T[x1], E[x1], label='Month-to-month')
    ax = model.plot(ax=axes)

    model.fit(T[x2], E[x2], label='Two year')
    ax1 = model.plot(ax=axes)
    ac1 = model.plot

    model.fit(T[x3], E[x3], label='One year')
    model.plot(ax=axes)


# Churn by contract for the lognormal model
churn_by_contract(LogNormalFitter(), axes[0][0])
# Churn by contract for the weibull model
churn_by_contract(WeibullFitter(), axes[0][1])
# Churn by contract for the loglogistic model
churn_by_contract(LogLogisticFitter(), axes[1][0])
# Churn by contract for the Exponential model
churn_by_contract(ExponentialFitter(), axes[1][1])

# Function for adding subtitles and labels
plot_details('Contract', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)
import pandas as pd
data = pd.read_csv('Dataset/telco_customer.csv')
data['tenure'] = pd.to_numeric(data['tenure'])
data = data[data['tenure'] > 0]

# Replace yes and No in the Churn column to 1 and 0. 1 for the event and 0 for the censured data.
data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

T = data['tenure']
E = data['Churn']

wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
ef = ExponentialFitter().fit(T, E, label='ExponentialFitter')
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')

wbf.plot_survival_function(ax=axes[0][0])
ef.plot_survival_function(ax=axes[0][1])
lnf.plot_survival_function(ax=axes[1][0])
llf.plot_survival_function(ax=axes[1][1])

plt.suptitle(
    'Implementation of  Paramteric Models to create survival functions on the teleco dataset'
)

fig.text(0.5, 0.04, 'Timeline', ha='center')
fig.text(0.04, 0.5, 'Probability', va='center', rotation='vertical')
plt.savefig('Images/SurvivalFunctions.jpeg')
plt.show()
Exemple #12
0
ix = np.random.randint(4, size=N)

T = np.where(ix == 0, np.maximum(T, MIN_0), T)
T = np.where(ix == 1, np.maximum(T, MIN_1), T)
T = np.where(ix == 2, np.maximum(T, MIN_2), T)
T = np.where(ix == 3, np.maximum(T, MIN_3), T)
E = T_actual == T

fig, axes = plt.subplots(2, 2, figsize=(9, 5))
axes = axes.reshape(4)

for i, model in enumerate([
        WeibullFitter(),
        KaplanMeierFitter(),
        LogNormalFitter(),
        LogLogisticFitter()
]):
    if isinstance(model, KaplanMeierFitter):
        model.fit_left_censoring(T, E, label=model.__class__.__name__)
    else:
        model.fit_left_censoring(T, E, label=model.__class__.__name__)

    model.plot_cumulative_density(ax=axes[i])
plt.tight_layout()

for i, model in enumerate(
    [WeibullFitter(), LogNormalFitter(),
     LogLogisticFitter()]):
    model.fit_left_censoring(T, E)
    fig, axes = plt.subplots(2, 1, figsize=(8, 6))
Exemple #13
0
def churn_by_partner(model, axes):
    model = model  # instantiate the class to create an object of required model

    groups = data['Partner']
    # group i1 , having the pandas series for the 1st cohort
    k1 = (groups == 'No')
    # group i2 , having the pandas series for the 2nd cohort
    k2 = (groups == 'Yes')

    # fit the model for 1st cohort
    model.fit(T[k1], E[k1], label='Do not have a partner')
    a1 = model.plot(ax=axes)

    # fit the model for 2nd cohort
    model.fit(T[k2], E[k2], label='Have a partner')
    model.plot(ax=axes)


# Churn by partner for the lognormal model
churn_by_partner(LogNormalFitter(), axes[0][0])
# Churn by partner for the weibull model
churn_by_partner(WeibullFitter(), axes[0][1])
# Churn by partner for the loglogistic model
churn_by_partner(LogLogisticFitter(), axes[1][0])
# Churn by partner for the Exponential model
churn_by_partner(ExponentialFitter(), axes[1][1])

# Function for adding subtitles and labels
plot_details('Partner', axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1], fig)