Exemple #1
0
    def test_from_dataframe_wrong_class(surv_data_frame):
        data = surv_data_frame

        with pytest.raises(TypeError,
                           match=r"exepected pandas.DataFrame, but got <class 'dict'>"):
            Surv.from_dataframe('event', 'time', data.to_dict())

        with pytest.raises(TypeError,
                           match=r"exepected pandas.DataFrame, but got <class 'numpy.ndarray'>"):
            Surv.from_dataframe('event', 'time', data.values)
Exemple #2
0
def rossi():
    """Load rossi.csv"""
    p = Path(__file__)
    f = p.parent / 'data' / 'rossi.csv'
    data = pandas.read_csv(f)
    y = Surv.from_dataframe("arrest", "week", data)
    x = data.drop(["arrest", "week"], axis=1)
    return DataSet(x=x, y=y)
Exemple #3
0
    def test_from_dataframe_no_such_column(surv_data_frame):
        data = surv_data_frame
        data['event'] = data['event'].astype(bool)

        expected = numpy.empty(dtype=[('event', bool), ('time', float)],
                               shape=100)
        expected['event'] = data['event']
        expected['time'] = data['time']

        with pytest.raises(
                KeyError,
                match=r'the label \[unknown\] is not in the \[columns\]'):
            Surv.from_dataframe('unknown', 'time', data)

        with pytest.raises(
                KeyError,
                match=r'the label \[unknown\] is not in the \[columns\]'):
            Surv.from_dataframe('event', 'unknown', data)
Exemple #4
0
    def test_from_dataframe_int(surv_data_frame):
        data = surv_data_frame

        expected = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        expected['event'] = data['event'].astype(bool)
        expected['time'] = data['time']

        y = Surv.from_dataframe('event', 'time', data)
        assert_array_equal(y, expected)
Exemple #5
0
    def test_from_dataframe_column_names(surv_data_frame):
        data = surv_data_frame.rename(columns={'event': 'death', 'time': 'time_to_death'})
        data['death'] = data['death'].astype(bool)

        expected = numpy.empty(dtype=[('death', bool), ('time_to_death', float)], shape=100)
        expected['death'] = data['death']
        expected['time_to_death'] = data['time_to_death']

        y = Surv.from_dataframe('death', 'time_to_death', data)
        assert_array_equal(y, expected)
Exemple #6
0
    def test_from_dataframe_no_str_columns(surv_data_frame):
        data = surv_data_frame
        data['event'] = data['event'].astype(bool)

        expected = numpy.empty(dtype=[('0', bool), ('1', float)], shape=100)
        expected['0'] = data['event']
        expected['1'] = data['time']

        y = Surv.from_dataframe(0, 1, data.rename(columns={'event': 0, 'time': 1}))
        assert_array_equal(y, expected)
# features = radiomics_features + clinical_features

features = [
    'Mstage', 'Nstage', 'SourceDataset', 'age', 'original_shape_VoxelVolume',
    'original_firstorder_Maximum', 'original_firstorder_Mean',
    'original_glcm_ClusterProminence', 'original_glcm_Idm',
    'original_glcm_Idn', 'original_glrlm_RunPercentage'
]

# Read data
input_train, output_train, input_test = preprocessing.load_owkin_data()
input_train = input_train[features]
input_test = input_test[features]
input_train, input_test = preprocessing.normalizing_input(
    input_train, input_test)
structured_y = Surv.from_dataframe('Event', 'SurvivalTime', output_train)

# Coxnet
# coxnet = CoxnetSurvivalAnalysis()
# print(cross_validate(coxnet, input_train, structured_y, cv=5))

# Grid search
tuned_params = {
    "l1_ratio": np.linspace(0.01, 0.02, 100),
    "n_alphas": range(140, 160, 1),
}
grid_search = RandomizedSearchCV(CoxnetSurvivalAnalysis(),
                                 tuned_params,
                                 cv=5,
                                 n_jobs=4,
                                 n_iter=1000)
 def setUp(self):
     data = pandas.read_csv(ROSSI_FILE)
     self.y = Surv.from_dataframe("arrest", "week", data)
     self.x = data.drop(["arrest", "week"], axis=1)
Exemple #9
0
 def _fit_params(self, X, y):
     y = Surv.from_dataframe('status', 'time', y)
     return {'X': X.values, 'y': y}