def test_from_dataframe_wrong_class(surv_data_frame): data = surv_data_frame with pytest.raises(TypeError, match=r"exepected pandas.DataFrame, but got <class 'dict'>"): Surv.from_dataframe('event', 'time', data.to_dict()) with pytest.raises(TypeError, match=r"exepected pandas.DataFrame, but got <class 'numpy.ndarray'>"): Surv.from_dataframe('event', 'time', data.values)
def rossi(): """Load rossi.csv""" p = Path(__file__) f = p.parent / 'data' / 'rossi.csv' data = pandas.read_csv(f) y = Surv.from_dataframe("arrest", "week", data) x = data.drop(["arrest", "week"], axis=1) return DataSet(x=x, y=y)
def test_from_dataframe_no_such_column(surv_data_frame): data = surv_data_frame data['event'] = data['event'].astype(bool) expected = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100) expected['event'] = data['event'] expected['time'] = data['time'] with pytest.raises( KeyError, match=r'the label \[unknown\] is not in the \[columns\]'): Surv.from_dataframe('unknown', 'time', data) with pytest.raises( KeyError, match=r'the label \[unknown\] is not in the \[columns\]'): Surv.from_dataframe('event', 'unknown', data)
def test_from_dataframe_int(surv_data_frame): data = surv_data_frame expected = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100) expected['event'] = data['event'].astype(bool) expected['time'] = data['time'] y = Surv.from_dataframe('event', 'time', data) assert_array_equal(y, expected)
def test_from_dataframe_column_names(surv_data_frame): data = surv_data_frame.rename(columns={'event': 'death', 'time': 'time_to_death'}) data['death'] = data['death'].astype(bool) expected = numpy.empty(dtype=[('death', bool), ('time_to_death', float)], shape=100) expected['death'] = data['death'] expected['time_to_death'] = data['time_to_death'] y = Surv.from_dataframe('death', 'time_to_death', data) assert_array_equal(y, expected)
def test_from_dataframe_no_str_columns(surv_data_frame): data = surv_data_frame data['event'] = data['event'].astype(bool) expected = numpy.empty(dtype=[('0', bool), ('1', float)], shape=100) expected['0'] = data['event'] expected['1'] = data['time'] y = Surv.from_dataframe(0, 1, data.rename(columns={'event': 0, 'time': 1})) assert_array_equal(y, expected)
# features = radiomics_features + clinical_features features = [ 'Mstage', 'Nstage', 'SourceDataset', 'age', 'original_shape_VoxelVolume', 'original_firstorder_Maximum', 'original_firstorder_Mean', 'original_glcm_ClusterProminence', 'original_glcm_Idm', 'original_glcm_Idn', 'original_glrlm_RunPercentage' ] # Read data input_train, output_train, input_test = preprocessing.load_owkin_data() input_train = input_train[features] input_test = input_test[features] input_train, input_test = preprocessing.normalizing_input( input_train, input_test) structured_y = Surv.from_dataframe('Event', 'SurvivalTime', output_train) # Coxnet # coxnet = CoxnetSurvivalAnalysis() # print(cross_validate(coxnet, input_train, structured_y, cv=5)) # Grid search tuned_params = { "l1_ratio": np.linspace(0.01, 0.02, 100), "n_alphas": range(140, 160, 1), } grid_search = RandomizedSearchCV(CoxnetSurvivalAnalysis(), tuned_params, cv=5, n_jobs=4, n_iter=1000)
def setUp(self): data = pandas.read_csv(ROSSI_FILE) self.y = Surv.from_dataframe("arrest", "week", data) self.x = data.drop(["arrest", "week"], axis=1)
def _fit_params(self, X, y): y = Surv.from_dataframe('status', 'time', y) return {'X': X.values, 'y': y}