def test_aalen_additive_median_predictions_split_data(self): # This tests to make sure that my median predictions statisfy # the prediction are greater than the actual 1/2 the time. # generate some hazard rates and a survival data set n = 2500 d = 5 timeline = np.linspace(0, 70, 5000) hz, coef, X = generate_hazard_rates(n, d, timeline) T = generate_random_lifetimes(hz, timeline) X['T'] = T # fit it to Aalen's model aaf = AalenAdditiveFitter() aaf.fit(X, 'T') # predictions T_pred = aaf.predict_median(X[list(range(6))]) assert abs((T_pred.values > T).mean() - 0.5) < 0.05
def test_swapping_order_of_columns_in_a_df_is_okay(self, rossi): aaf = AalenAdditiveFitter() aaf.fit(rossi, event_col='arrest', duration_col='week') misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin'] natural_order = rossi.columns.drop(['week', 'arrest']) deleted_order = rossi.columns.difference(['week', 'arrest']) assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder])) assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order])) aaf = AalenAdditiveFitter(fit_intercept=False) aaf.fit(rossi, event_col='arrest', duration_col='week') assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder])) assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))
def test_swapping_order_of_columns_in_a_df_is_okay(self, rossi): aaf = AalenAdditiveFitter() aaf.fit(rossi, event_col='arrest', duration_col='week') misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin'] natural_order = rossi.columns.drop(['week', 'arrest']) deleted_order = rossi.columns - ['week', 'arrest'] assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder])) assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order])) aaf = AalenAdditiveFitter(fit_intercept=False) aaf.fit(rossi, event_col='arrest', duration_col='week') assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder])) assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))
clinical, return_type='dataframe') X['T'] = clinical['OS_OS'] X['C'] = clinical['OS_vital_status'] trainX = X.ix[trainLabels[i], :].reset_index() testX = X.ix[testLabels[i], :].reset_index() #Build model and train aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True) aaf.fit(trainX.drop(['index'], axis=1), duration_col='T', event_col='C', show_progress=False) #Predict on testing data median = aaf.predict_median(testX.drop(['T', 'C', 'index'], axis=1)) median.index = testX['index'] predictions.append(median.replace([np.inf, -np.inf, np.nan], 0)) # ###Saving Results to Synapse and ask Synapse to evaluate our predictions # To document what we have done we will start by storing this code in Synapse as a file Entity. # In[34]: codeEntity = synapseclient.File('tcga_survival_analysis.py', parentId='syn1720423') codeEntity = syn.store(codeEntity) # We then save the predictions we made to a file and create a file Entity for it. # In[95]:
#Go through each training testing monteCarlo sampling and train/predict predictions=[] for i in range(trainLabels.shape[1]): X = patsy.dmatrix('age + grade + stage -1', clinical, return_type='dataframe') X['T'] = clinical['OS_OS'] X['C'] = clinical['OS_vital_status'] trainX = X.ix[trainLabels[i],:].reset_index() testX = X.ix[testLabels[i],:].reset_index() #Build model and train aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True) aaf.fit(trainX.drop(['index'], axis=1), duration_col='T', event_col='C',show_progress=False) #Predict on testing data median = aaf.predict_median(testX.drop(['T','C', 'index'], axis=1)) median.index = testX['index'] predictions.append(median.replace([np.inf, -np.inf, np.nan], 0)) # ###Saving Results to Synapse and ask Synapse to evaluate our predictions # To document what we have done we will start by storing this code in Synapse as a file Entity. # In[34]: codeEntity = synapseclient.File('tcga_survival_analysis.py', parentId='syn1720423') codeEntity = syn.store(codeEntity) # We then save the predictions we made to a file and create a file Entity for it.