lt = mcf(df, robust=True, positive=False) covariate = 'Drug' cohort1, cohort2 = 'Placebo', 'Thiotepa' group_df = df.set_index(covariate) group_lt = df.groupby(covariate).apply(lambda sfr: mcf(sfr, robust=True, positive=False)) df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2] lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2] lt_diff = mcfdiff(lt1, lt2) # Plot, no stratification fig, (ax1, ax2) = plt.subplots(2, 1) plot_data(df, ax=ax1) plot_mcf(lt, ax=ax2) plt.tight_layout() # Plot, stratified fig, (ax1, ax2) = plt.subplots(2, 1) plot_datas([(cohort1, df1), (cohort2, df2)], ax=ax1) plot_mcfs([(cohort1, lt1), (cohort2, lt2)], ax=ax2) # Comparison plot fig, ax1 = plt.subplots(1, 1) plot_mcfdiff(lt_diff, ax=ax1, label='%s vs. %s' % (cohort1, cohort2)) plt.tight_layout() # Compute p-values p_value0 = logrank(df1, df2)
# for a fleet of m = 134 large utility vehicles operated by a city. The data were # collected over a three-year period on new vehicles which were purchased and # placed in service over the first two years of the study. Time is measured in # years from the start of the study, and costs are in hundreds of dollars. raw_data = generate_data() df = transform_data(raw_data) # Table D.4. # Marked point process (8.14) as mcf1 and cumulative cost process (8.12) as mcf2 lt1 = mcf(df, robust=True, positive=False) mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False) mcf2 = mcfcost(df, robust=True, positive=False) # Plot data plot_data(df, alpha=0.5, marker='.', plot_costs=False) # Plot MCFs fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=False) plot_mcf(mcf1, ax=ax1, cost=True, label='Events * Cost') plot_mcf(mcf2, ax=ax2, cost=True, label='Cost') # Table 8.1. mcf1['E[C].Std'] = np.sqrt(mcf1['E[C].Var']) mcf2['E[C].Std'] = np.sqrt(mcf2['E[C].Var']) mcf1 = mcf1[['Time', 'E[C]', 'E[C].Std']] mcf2 = mcf2[['Time', 'E[C]', 'E[C].Std']] mcf1 = mcf1.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50], method='nearest') mcf2 = mcf2.set_index('Time').reindex([0.50, 1.00, 1.50, 2.00, 2.50], method='nearest')
playtime_dist, purchase_dist) raw_data = process.Sample(300, max_date) df_sessions = transform_data(raw_data[(raw_data['Type'] == 0) | (raw_data['Event'] == 0)]) df_purchases = transform_data(raw_data[(raw_data['Type'] == 1) | (raw_data['Event'] == 0)]) mcf_sessions = mcfcost(df_sessions, robust=True, positive=True) mcf_purchases = mcfcost(df_purchases, robust=True, positive=True) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True, sharey=False) plot_data(df_sessions, ax=ax1, marker='.', label='Session', alpha=0.2) plot_data(df_purchases, ax=ax2, marker='*', label='Purchase', alpha=0.2) plot_mcf(mcf_sessions, ax=ax3, cost=True).set_ylabel('Playtime') plot_mcf(mcf_purchases, ax=ax4, cost=True).set_ylabel('Lifetime Value') T = np.linspace(0, max_date, 100) E_playtime = playtime_dist.mean() * play_rate / churn_rate * ( 1 - np.exp(-churn_rate * T)) E_purchases = purchase_dist.mean() * purchase_rate / churn_rate * ( 1 - np.exp(-churn_rate * T)) ax3.plot(T, E_playtime, linestyle='-', alpha=0.5, color='black', label='True')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from data_operations import transform_population, mcf, mcfdiff, mcfequal,\ plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, population_reverse if __name__ == '__main__': # Defrost Control Example raw_data = pd.read_csv('datas/defrost_interval.csv', sep=';')#, usecols=[0,1,2]) df = transform_population(raw_data) lt1 = mcf(df, robust=False, positive=False, interval=True) print df.head() fr = population_reverse(df) plot_data(fr, alpha=0.005) fig, (ax1) = plt.subplots(1, 1, sharex=True, sharey=True) plot_mcf(lt1, ax=ax1, interval=True) plt.ylim([0.0, 0.1]) plt.show() #lt_cost = mcf_cost(fr, mcf_compound=lt) #print lt_cost.head() #plot_cost(mcf_cost)
# Fan Example raw_data = pd.read_csv('datas/fan_population.csv', sep=';') #, usecols=[0,1,2]) raw_data.rename(columns={'Cost': 'Costs'}, inplace=True) df = transform_population(raw_data) fr = population_reverse(df) fr2 = population_reverse_costs(df) lt1 = mcf(df, robust=False, positive=False) lt2 = mcf(df, robust=False, positive=True) mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False) mcf2 = mcfcost(df, mcf_compound=lt2, robust=False, positive=True) plot_data(fr2, alpha=0.5) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True, sharey=False) plot_mcf(lt1, ax=ax1, label='Normal limits') plot_mcf(lt2, ax=ax2, label='Normal limits (positive)') plot_mcf(mcf1, ax=ax3, cost=True, label='Normal limits') plot_mcf(mcf2, ax=ax4, cost=True, label='Normal limits (positive)') plt.show() ax1.set_ylim([-0.01, 0.51]) ax2.set_ylim([-0.01, 0.51]) #lt_cost = mcf_cost(fr, mcf_compound=lt) #print lt_cost.head() #plot_cost(mcf_cost)