예제 #1
0
    lt = mcf(df, robust=True, positive=False)

    covariate = 'Drug'
    cohort1, cohort2 = 'Placebo', 'Thiotepa'

    group_df = df.set_index(covariate)
    group_lt = df.groupby(covariate).apply(lambda sfr: mcf(sfr, robust=True, positive=False))

    df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2]
    lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2]

    lt_diff = mcfdiff(lt1, lt2)

    # Plot, no stratification
    fig, (ax1, ax2) = plt.subplots(2, 1)
    plot_data(df, ax=ax1)
    plot_mcf(lt, ax=ax2)
    plt.tight_layout()

    # Plot, stratified
    fig, (ax1, ax2) = plt.subplots(2, 1)
    plot_datas([(cohort1, df1), (cohort2, df2)], ax=ax1)
    plot_mcfs([(cohort1, lt1), (cohort2, lt2)], ax=ax2)

    # Comparison plot
    fig, ax1 = plt.subplots(1, 1)
    plot_mcfdiff(lt_diff, ax=ax1, label='%s vs. %s' % (cohort1, cohort2))
    plt.tight_layout()

    # Compute p-values
    p_value0 = logrank(df1, df2)
예제 #2
0
    #   for a fleet of m = 134 large utility vehicles operated by a city. The data were
    #   collected over a three-year period on new vehicles which were purchased and
    #   placed in service over the first two years of the study. Time is measured in
    #   years from the start of the study, and costs are in hundreds of dollars.
    raw_data = generate_data()
    df = transform_data(raw_data)

    # Table D.4.

    # Marked point process (8.14) as mcf1 and cumulative cost process (8.12) as mcf2
    lt1 = mcf(df, robust=True, positive=False)
    mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False)
    mcf2 = mcfcost(df, robust=True, positive=False)

    # Plot data
    plot_data(df, alpha=0.5, marker='.', plot_costs=False)

    # Plot MCFs
    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=False)
    plot_mcf(mcf1, ax=ax1, cost=True, label='Events * Cost')
    plot_mcf(mcf2, ax=ax2, cost=True, label='Cost')

    # Table 8.1.
    mcf1['E[C].Std'] = np.sqrt(mcf1['E[C].Var'])
    mcf2['E[C].Std'] = np.sqrt(mcf2['E[C].Var'])
    mcf1 = mcf1[['Time', 'E[C]', 'E[C].Std']]
    mcf2 = mcf2[['Time', 'E[C]', 'E[C].Std']]
    mcf1 = mcf1.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50],
                                          method='nearest')
    mcf2 = mcf2.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50],
                                          method='nearest')
예제 #3
0
                                      playtime_dist, purchase_dist)
    raw_data = process.Sample(300, max_date)

    df_sessions = transform_data(raw_data[(raw_data['Type'] == 0) |
                                          (raw_data['Event'] == 0)])
    df_purchases = transform_data(raw_data[(raw_data['Type'] == 1) |
                                           (raw_data['Event'] == 0)])

    mcf_sessions = mcfcost(df_sessions, robust=True, positive=True)
    mcf_purchases = mcfcost(df_purchases, robust=True, positive=True)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,
                                                 2,
                                                 sharex=True,
                                                 sharey=False)
    plot_data(df_sessions, ax=ax1, marker='.', label='Session', alpha=0.2)
    plot_data(df_purchases, ax=ax2, marker='*', label='Purchase', alpha=0.2)
    plot_mcf(mcf_sessions, ax=ax3, cost=True).set_ylabel('Playtime')
    plot_mcf(mcf_purchases, ax=ax4, cost=True).set_ylabel('Lifetime Value')

    T = np.linspace(0, max_date, 100)
    E_playtime = playtime_dist.mean() * play_rate / churn_rate * (
        1 - np.exp(-churn_rate * T))
    E_purchases = purchase_dist.mean() * purchase_rate / churn_rate * (
        1 - np.exp(-churn_rate * T))
    ax3.plot(T,
             E_playtime,
             linestyle='-',
             alpha=0.5,
             color='black',
             label='True')
예제 #4
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_operations import transform_population, mcf, mcfdiff, mcfequal,\
    plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, population_reverse

if __name__ == '__main__':

    # Defrost Control Example
    raw_data = pd.read_csv('datas/defrost_interval.csv', sep=';')#, usecols=[0,1,2])
    df = transform_population(raw_data)
    lt1 = mcf(df, robust=False, positive=False, interval=True)
    print df.head()

    fr = population_reverse(df)
    plot_data(fr, alpha=0.005)

    fig, (ax1) = plt.subplots(1, 1, sharex=True, sharey=True)
    plot_mcf(lt1, ax=ax1, interval=True)
    plt.ylim([0.0, 0.1])
    plt.show()

    #lt_cost = mcf_cost(fr, mcf_compound=lt)
    #print lt_cost.head()
    #plot_cost(mcf_cost)
예제 #5
0
    # Fan Example
    raw_data = pd.read_csv('datas/fan_population.csv',
                           sep=';')  #, usecols=[0,1,2])
    raw_data.rename(columns={'Cost': 'Costs'}, inplace=True)
    df = transform_population(raw_data)
    fr = population_reverse(df)
    fr2 = population_reverse_costs(df)

    lt1 = mcf(df, robust=False, positive=False)
    lt2 = mcf(df, robust=False, positive=True)

    mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False)
    mcf2 = mcfcost(df, mcf_compound=lt2, robust=False, positive=True)

    plot_data(fr2, alpha=0.5)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,
                                                 2,
                                                 sharex=True,
                                                 sharey=False)
    plot_mcf(lt1, ax=ax1, label='Normal limits')
    plot_mcf(lt2, ax=ax2, label='Normal limits (positive)')
    plot_mcf(mcf1, ax=ax3, cost=True, label='Normal limits')
    plot_mcf(mcf2, ax=ax4, cost=True, label='Normal limits (positive)')
    plt.show()
    ax1.set_ylim([-0.01, 0.51])
    ax2.set_ylim([-0.01, 0.51])
    #lt_cost = mcf_cost(fr, mcf_compound=lt)
    #print lt_cost.head()
    #plot_cost(mcf_cost)