Ejemplo n.º 1
0
    raw_data = generate_data()
    df = transform_data(raw_data)

    # Table D.4.

    # Marked point process (8.14) as mcf1 and cumulative cost process (8.12) as mcf2
    lt1 = mcf(df, robust=True, positive=False)
    mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False)
    mcf2 = mcfcost(df, robust=True, positive=False)

    # Plot data
    plot_data(df, alpha=0.5, marker='.', plot_costs=False)

    # Plot MCFs
    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=False)
    plot_mcf(mcf1, ax=ax1, cost=True, label='Events * Cost')
    plot_mcf(mcf2, ax=ax2, cost=True, label='Cost')

    # Table 8.1.
    mcf1['E[C].Std'] = np.sqrt(mcf1['E[C].Var'])
    mcf2['E[C].Std'] = np.sqrt(mcf2['E[C].Var'])
    mcf1 = mcf1[['Time', 'E[C]', 'E[C].Std']]
    mcf2 = mcf2[['Time', 'E[C]', 'E[C].Std']]
    mcf1 = mcf1.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50],
                                          method='nearest')
    mcf2 = mcf2.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50],
                                          method='nearest')
    mcf1.rename(columns={
        'E[C]': 'EST. (8.14)',
        'E[C].Std': 'S.E. (8.15)'
    },
Ejemplo n.º 2
0
    covariate = 'Drug'
    cohort1, cohort2 = 'Placebo', 'Thiotepa'

    group_df = df.set_index(covariate)
    group_lt = df.groupby(covariate).apply(lambda sfr: mcf(sfr, robust=True, positive=False))

    df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2]
    lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2]

    lt_diff = mcfdiff(lt1, lt2)

    # Plot, no stratification
    fig, (ax1, ax2) = plt.subplots(2, 1)
    plot_data(df, ax=ax1)
    plot_mcf(lt, ax=ax2)
    plt.tight_layout()

    # Plot, stratified
    fig, (ax1, ax2) = plt.subplots(2, 1)
    plot_datas([(cohort1, df1), (cohort2, df2)], ax=ax1)
    plot_mcfs([(cohort1, lt1), (cohort2, lt2)], ax=ax2)

    # Comparison plot
    fig, ax1 = plt.subplots(1, 1)
    plot_mcfdiff(lt_diff, ax=ax1, label='%s vs. %s' % (cohort1, cohort2))
    plt.tight_layout()

    # Compute p-values
    p_value0 = logrank(df1, df2)
    p_value1 = mcfequal(df1, df2)
Ejemplo n.º 3
0
                           sep=';')  #, usecols=[0,1,2])

    cohort1, cohort2 = 'Male', 'Female'
    df1 = transform_population(
        raw_data[raw_data['Sex'] == cohort1]).fillna(cohort1)
    df2 = transform_population(
        raw_data[raw_data['Sex'] == cohort2]).fillna(cohort2)
    df = pd.concat((df1, df2))

    lt1 = mcf(df1, robust=False, positive=False)
    lt2 = mcf(df2, robust=False, positive=False)
    lt_diff = mcfdiff(lt1, lt2)

    # Plot, stratified
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=False, sharex=True)
    plot_mcf(lt1, ax=ax1, interval=True, CI=True, label=cohort1)
    plot_mcfs([(cohort1, lt1), (cohort2, lt2)],
              ax=ax2,
              interval=True,
              CI=False)
    plot_mcfdiff(lt_diff, ax=ax3, label='%s vs. %s' % (cohort1, cohort2))
    ax1.set_ylim([0, 2])
    ax2.set_ylim([0, 2])
    ax3.set_ylim([-1, 1])
    plt.xlim([0, 60])

    # Compute p-values
    p_value1 = mcfequal(df1, df2)
    print "p-value: %.3f" % (p_value1)

    plt.show()
Ejemplo n.º 4
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_operations import transform_population, mcf, mcfdiff, mcfequal,\
    plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, population_reverse

if __name__ == '__main__':

    # Defrost Control Example
    raw_data = pd.read_csv('datas/defrost_interval.csv', sep=';')#, usecols=[0,1,2])
    df = transform_population(raw_data)
    lt1 = mcf(df, robust=False, positive=False, interval=True)
    print df.head()

    fr = population_reverse(df)
    plot_data(fr, alpha=0.005)

    fig, (ax1) = plt.subplots(1, 1, sharex=True, sharey=True)
    plot_mcf(lt1, ax=ax1, interval=True)
    plt.ylim([0.0, 0.1])
    plt.show()

    #lt_cost = mcf_cost(fr, mcf_compound=lt)
    #print lt_cost.head()
    #plot_cost(mcf_cost)
Ejemplo n.º 5
0
    df_sessions = transform_data(raw_data[(raw_data['Type'] == 0) |
                                          (raw_data['Event'] == 0)])
    df_purchases = transform_data(raw_data[(raw_data['Type'] == 1) |
                                           (raw_data['Event'] == 0)])

    mcf_sessions = mcfcost(df_sessions, robust=True, positive=True)
    mcf_purchases = mcfcost(df_purchases, robust=True, positive=True)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,
                                                 2,
                                                 sharex=True,
                                                 sharey=False)
    plot_data(df_sessions, ax=ax1, marker='.', label='Session', alpha=0.2)
    plot_data(df_purchases, ax=ax2, marker='*', label='Purchase', alpha=0.2)
    plot_mcf(mcf_sessions, ax=ax3, cost=True).set_ylabel('Playtime')
    plot_mcf(mcf_purchases, ax=ax4, cost=True).set_ylabel('Lifetime Value')

    T = np.linspace(0, max_date, 100)
    E_playtime = playtime_dist.mean() * play_rate / churn_rate * (
        1 - np.exp(-churn_rate * T))
    E_purchases = purchase_dist.mean() * purchase_rate / churn_rate * (
        1 - np.exp(-churn_rate * T))
    ax3.plot(T,
             E_playtime,
             linestyle='-',
             alpha=0.5,
             color='black',
             label='True')
    ax4.plot(T,
             E_purchases,
Ejemplo n.º 6
0
import matplotlib.pyplot as plt
from data_operations import transform_population, mcf, mcfdiff, mcfequal,\
    plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, population_reverse

if __name__ == '__main__':

    # Compressor Example
    raw_data = pd.read_csv('datas/compressor_population.csv',
                           sep=';')  #, usecols=[0,1,2])
    #raw_data = raw_data.groupby('Time', as_index=False).sum()
    df = transform_population(raw_data)
    print population_reverse(df)
    group_df = raw_data.groupby('Building').apply(
        lambda sdata: population_reverse(transform_population(sdata)))
    dfs = list(group_df.groupby(level=0))

    lt1 = mcf(df, robust=False, positive=False)
    lt2 = mcf(df, robust=False, positive=True)

    plot_datas(dfs, alpha=0.1)

    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=True)
    plot_mcf(lt1, ax=ax1, label='Normal limits')
    plot_mcf(lt2, ax=ax2, label='Normal limits (positive)')
    plt.ylim([-0.005, 0.1])
    plt.show()

    #lt_cost = mcf_cost(fr, mcf_compound=lt)
    #print lt_cost.head()
    #plot_cost(mcf_cost)
Ejemplo n.º 7
0
    # Fan Example
    raw_data = pd.read_csv('datas/fan_population.csv',
                           sep=';')  #, usecols=[0,1,2])
    raw_data.rename(columns={'Cost': 'Costs'}, inplace=True)
    df = transform_population(raw_data)
    fr = population_reverse(df)
    fr2 = population_reverse_costs(df)

    lt1 = mcf(df, robust=False, positive=False)
    lt2 = mcf(df, robust=False, positive=True)

    mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False)
    mcf2 = mcfcost(df, mcf_compound=lt2, robust=False, positive=True)

    plot_data(fr2, alpha=0.5)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,
                                                 2,
                                                 sharex=True,
                                                 sharey=False)
    plot_mcf(lt1, ax=ax1, label='Normal limits')
    plot_mcf(lt2, ax=ax2, label='Normal limits (positive)')
    plot_mcf(mcf1, ax=ax3, cost=True, label='Normal limits')
    plot_mcf(mcf2, ax=ax4, cost=True, label='Normal limits (positive)')
    plt.show()
    ax1.set_ylim([-0.01, 0.51])
    ax2.set_ylim([-0.01, 0.51])
    #lt_cost = mcf_cost(fr, mcf_compound=lt)
    #print lt_cost.head()
    #plot_cost(mcf_cost)