plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, print_data if __name__ == '__main__': # Bladder Example #raw_data = pd.read_csv('datas/bladder_recurrent.csv', sep=';') raw_data = pd.read_csv('datas/T45.csv', sep=';') raw_data.rename(columns={'Patient number': 'Sample', 'Event': 'Number', 'Censored': 'Event', 'Treatment group': 'Drug'}, inplace=True) raw_data['Drug'] = raw_data['Drug'].map({1: 'Placebo', 2: 'Pyridoxine', 3: 'Thiotepa'}) raw_data['Event'] = 1 - raw_data['Event'] tb = print_data(raw_data) print tb df = transform_data(raw_data) lt = mcf(df, robust=True, positive=False) covariate = 'Drug' cohort1, cohort2 = 'Placebo', 'Thiotepa' group_df = df.set_index(covariate) group_lt = df.groupby(covariate).apply(lambda sfr: mcf(sfr, robust=True, positive=False)) df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2] lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2] lt_diff = mcfdiff(lt1, lt2) # Plot, no stratification fig, (ax1, ax2) = plt.subplots(2, 1)
rows.append((i, t_ij, 1, cost)) t_ij += expon.rvs(scale=1. / rate) rows.append((i, t_i, 0, np.nan)) return pd.DataFrame(rows, columns=['Sample', 'Time', 'Event', 'Cost']) if __name__ == '__main__': # Cook (2008, pg. 299) Example 8.1: Field Repair data # This dataset (see Appendix D) gives simulated data on unscheduled repairs # for a fleet of m = 134 large utility vehicles operated by a city. The data were # collected over a three-year period on new vehicles which were purchased and # placed in service over the first two years of the study. Time is measured in # years from the start of the study, and costs are in hundreds of dollars. raw_data = generate_data() df = transform_data(raw_data) # Table D.4. # Marked point process (8.14) as mcf1 and cumulative cost process (8.12) as mcf2 lt1 = mcf(df, robust=True, positive=False) mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False) mcf2 = mcfcost(df, robust=True, positive=False) # Plot data plot_data(df, alpha=0.5, marker='.', plot_costs=False) # Plot MCFs fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=False) plot_mcf(mcf1, ax=ax1, cost=True, label='Events * Cost') plot_mcf(mcf2, ax=ax2, cost=True, label='Cost')
raw_data.rename(columns={ 'Patient number': 'Sample', 'Event': 'Number', 'Censored': 'Event', 'Treatment group': 'Drug' }, inplace=True) raw_data['Drug'] = raw_data['Drug'].map({ 1: 'Placebo', 2: 'Pyridoxine', 3: 'Thiotepa' }) raw_data['Event'] = 1 - raw_data['Event'] print raw_data.head() df = transform_data(raw_data) lt = mcf(df, robust=True, positive=False) covariate = 'Drug' cohort1, cohort2 = 'Placebo', 'Thiotepa' group_df = df.set_index(covariate) group_lt = df.groupby(covariate).apply( lambda sfr: mcf(sfr, robust=True, positive=False)) df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2] lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2] lt_diff = mcfdiff(lt1, lt2) # Plot, no stratification
import numpy as np import pandas as pd import matplotlib.pyplot as plt from data_operations import transform_data, mcf, mcfdiff, mcfequal,\ plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs if __name__ == '__main__': # Transmission Example data = pd.read_csv('datas/transmission_recurrent.csv', sep=';') df = transform_data(data, format='recurrent') lt = mcf(df, robust=True, positive=False) print lt[['Time', 'Y', 'N', 'dE[N]', 'E[N]', 'E[N].lcl', 'E[N].ucl']].head() covariate = 'Transmission' cohort1, cohort2 = 'Automatic', 'Manual' group_df = df.set_index(covariate) group_lt = df.groupby(covariate).apply( lambda sfr: mcf(sfr, robust=True, positive=True)) #, interval=True)) df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2] lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2] lt_diff = mcfdiff(lt1, lt2) # Plot, no stratification fig, (ax1, ax2) = plt.subplots(2, 1) plot_data(df, ax=ax1)
T, columns=['Sample', 'Time', 'Event', 'Type', 'Cost']) if __name__ == '__main__': max_date = 180 play_rate, purchase_rate, churn_rate = 0.1, 0.004, 0.02 playtime_dist = stats.expon(scale=0.25) purchase_dist = stats.rv_discrete(values=((1, 4, 10, 20), (0.80, 0.15, 0.04, 0.01))) process = MarkovRecurrentTerminal(play_rate, purchase_rate, churn_rate, playtime_dist, purchase_dist) raw_data = process.Sample(300, max_date) df_sessions = transform_data(raw_data[(raw_data['Type'] == 0) | (raw_data['Event'] == 0)]) df_purchases = transform_data(raw_data[(raw_data['Type'] == 1) | (raw_data['Event'] == 0)]) mcf_sessions = mcfcost(df_sessions, robust=True, positive=True) mcf_purchases = mcfcost(df_purchases, robust=True, positive=True) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True, sharey=False) plot_data(df_sessions, ax=ax1, marker='.', label='Session', alpha=0.2) plot_data(df_purchases, ax=ax2, marker='*', label='Purchase', alpha=0.2) plot_mcf(mcf_sessions, ax=ax3, cost=True).set_ylabel('Playtime') plot_mcf(mcf_purchases, ax=ax4, cost=True).set_ylabel('Lifetime Value')