def setUp(self): x, self.y = load_whas500() x = column.categorical_to_numeric(column.standardize(x, with_std=False)) self.x = x.values self.columns = x.columns.tolist()
def test_standardize_numpy_array(): result = column.standardize(MIXED_DATA_FRAME.values) expected = numpy.array( [[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301], [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012], [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723], [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434], [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145], [0.165145, 0.165145, 0.165145, 0.165145, 0.165145], [0.495434, 0.495434, 0.495434, 0.495434, 0.495434], [0.825723, 0.825723, 0.825723, 0.825723, 0.825723], [1.156012, 1.156012, 1.156012, 1.156012, 1.156012], [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]]) assert isinstance(result, numpy.ndarray) assert_array_almost_equal(expected, result[:, :NUMERIC_DATA_FRAME.shape[1]]) assert_array_equal( pandas.isnull(NON_NUMERIC_DATA_FRAME), pandas.isnull(result[:, NUMERIC_DATA_FRAME.shape[1]:])) non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9] assert_array_equal( NON_NUMERIC_DATA_FRAME.iloc[non_nan_idx, :].values, result[:, NUMERIC_DATA_FRAME.shape[1]:][non_nan_idx, :])
def test_compare_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) kpca = KernelPCA(kernel=trans.pairwise_kernel) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def _make_whas500(with_mean=True, with_std=True, to_numeric=False): x, y = load_whas500() if with_mean: x = standardize(x, with_std=with_std) if to_numeric: x = categorical_to_numeric(x) names = ['(Intercept)'] + x.columns.tolist() return DataSetWithNames(x=x.values, y=y, names=names, x_data_frame=x)
def test_standardize_not_inplace(numeric_data): numeric_data_frame, expected = numeric_data numeric_array = numeric_data_frame.values before = numeric_array.copy() result = column.standardize(numeric_array) assert_array_almost_equal(expected, result) assert_array_almost_equal(before, numeric_array)
def test_standardize_mixed(numeric_data, non_numeric_data_frame): numeric_data_frame, expected = numeric_data mixed_data_frame = pandas.concat((numeric_data_frame, non_numeric_data_frame), axis=1) result = column.standardize(mixed_data_frame) assert isinstance(result, pandas.DataFrame) assert_array_almost_equal(expected, result.iloc[:, :numeric_data_frame.shape[1]].values) tm.assert_frame_equal(non_numeric_data_frame, result.iloc[:, numeric_data_frame.shape[1]:])
def test_standardize_non_numeric_numpy_array(non_numeric_data_frame): result = column.standardize(non_numeric_data_frame.values) assert isinstance(result, numpy.ndarray) assert_array_equal(pandas.isnull(non_numeric_data_frame), pandas.isnull(result)) non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9] assert_array_equal(non_numeric_data_frame.iloc[non_nan_idx, :].values, result[non_nan_idx, :])
def test_standardize_mixed_numpy_array(numeric_data, non_numeric_data_frame): numeric_data_frame, _ = numeric_data mixed_data_frame = pandas.concat((numeric_data_frame, non_numeric_data_frame), axis=1) result = column.standardize(mixed_data_frame.values) assert_array_equal(pandas.isnull(mixed_data_frame), pandas.isnull(result)) assert_array_almost_equal(numeric_data_frame, result[:, :numeric_data_frame.shape[1]]) non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9] assert_array_equal(non_numeric_data_frame.iloc[non_nan_idx, :].values, result[:, numeric_data_frame.shape[1]:][non_nan_idx, :])
def test_predict(rossi): cph = CoxPHSurvivalAnalysis() xc = standardize(rossi.x, with_std=False) cph.fit(xc.values, rossi.y) expected = numpy.array([-0.136002823953217, -1.13104636905577, 0.741965816026403, -0.98072115186145, -0.600098931134794, -0.997407014712788, -0.0993800739865776, -0.266761246895696, -0.665145743277517, -0.418747210463951, -0.0770761787926419, 0.411385264707043, -0.0770761787926419, 0.563114305747799, -1.07096133044073]) idx = numpy.array([15, 77, 79, 90, 113, 122, 134, 172, 213, 219, 257, 313, 364, 395, 409]) pred = cph.predict(xc.iloc[idx, :].values) assert_array_almost_equal(expected, pred)
def test_standardize_numeric(self): result = column.standardize(NUMERIC_DATA_FRAME) expected = numpy.array([[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301], [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012], [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723], [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434], [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145], [0.165145, 0.165145, 0.165145, 0.165145, 0.165145], [0.495434, 0.495434, 0.495434, 0.495434, 0.495434], [0.825723, 0.825723, 0.825723, 0.825723, 0.825723], [1.156012, 1.156012, 1.156012, 1.156012, 1.156012], [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]]) self.assertTrue(isinstance(result, pandas.DataFrame)) assert_array_almost_equal(expected, result)
def test_standardize_mixed(self): result = column.standardize(MIXED_DATA_FRAME) expected = numpy.array([[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301], [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012], [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723], [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434], [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145], [0.165145, 0.165145, 0.165145, 0.165145, 0.165145], [0.495434, 0.495434, 0.495434, 0.495434, 0.495434], [0.825723, 0.825723, 0.825723, 0.825723, 0.825723], [1.156012, 1.156012, 1.156012, 1.156012, 1.156012], [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]]) self.assertTrue(isinstance(result, pandas.DataFrame)) assert_array_almost_equal(expected, result.iloc[:, :NUMERIC_DATA_FRAME.shape[1]].values) tm.assert_frame_equal(NON_NUMERIC_DATA_FRAME, result.iloc[:, NUMERIC_DATA_FRAME.shape[1]:])
def test_fit_and_predict_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel, max_iter=100, random_state=0) ssvm.fit(x.values, y) self.assertFalse(ssvm._pairwise) self.assertEquals(x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(x.values, y) self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)
def test_standardize_non_numeric(self): result = column.standardize(NON_NUMERIC_DATA_FRAME) self.assertTrue(isinstance(result, pandas.DataFrame)) tm.assert_frame_equal(NON_NUMERIC_DATA_FRAME, result)
def setUp(self): x, self.y = load_whas500() self.x = encode_categorical(standardize(x))
def setUp(self): x, self.y, = load_whas500() self.x = standardize(x)
def test_predict_survival_function(self): cph = CoxPHSurvivalAnalysis() xc = standardize(self.x, with_std=False) cph.fit(xc, self.y) test_idx = [9, 3, 313, 122, 431] f = cph.predict_survival_function(xc.values[test_idx, :]) self.assertEqual(len(f), len(test_idx)) expected_x = numpy.array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52 ]) assert_array_almost_equal(f[0].x, expected_x) expected_y = numpy.array( [[ 0.997461059155262, 0.994921063628358, 0.992385735058868, 0.989851752006058, 0.987316287353143, 0.984776787077271, 0.982229880405155, 0.969572939420418, 0.964484914195169, 0.961943739897691, 0.956832876286024, 0.95164893059497, 0.949061479987368, 0.941308786676055, 0.936108961524705, 0.930908109920408, 0.923134421802298, 0.915290309566414, 0.910039421812024, 0.896950008976804, 0.891710496383746, 0.889091087732452, 0.886464513020254, 0.876021834875649, 0.868208744867335, 0.860295712033317, 0.855028100866882, 0.849763725779484, 0.844479679508505, 0.841831970503126, 0.836536000803172, 0.831235646418842, 0.825894605739379, 0.815224813877833, 0.807213878413131, 0.796552033034344, 0.793873120526445, 0.788520538165122, 0.777873630457932, 0.772511632709885, 0.761861634114672, 0.756514792077836, 0.751165903610347, 0.740464726897091, 0.737784677797559, 0.732429416106706, 0.719080607292328, 0.71107749717937, 0.700374533644241 ], [ 0.999043882016474, 0.998085846760273, 0.997128049274331, 0.996169233883962, 0.995208324378805, 0.994244341388665, 0.993275988428092, 0.988440345477267, 0.986485348400118, 0.985506531271212, 0.983533018718637, 0.981524558134704, 0.980519526333744, 0.977497911740073, 0.975462571926908, 0.973419764375874, 0.970353074837982, 0.967242232655452, 0.965150517551612, 0.95990330510734, 0.957789534855612, 0.9567298847967, 0.955665378803038, 0.951413568664925, 0.948211695979523, 0.944950494817014, 0.942769166879369, 0.940580788681338, 0.938375713299261, 0.937267565568928, 0.935044498941264, 0.932810785284511, 0.930550920812338, 0.926008946520639, 0.922574381856697, 0.917970142218167, 0.916807238599598, 0.914476355200083, 0.909810449176314, 0.907445513776142, 0.902717737080621, 0.900328600224742, 0.897927987577297, 0.893093037207259, 0.891875336188489, 0.88943384158804, 0.883299174284255, 0.879587091746084, 0.874581779605126 ], [ 0.997047493865252, 0.994094987736893, 0.991149132479536, 0.988206067094847, 0.985262510789073, 0.982315505128104, 0.979361149485527, 0.964697962626958, 0.958812207831876, 0.955874508145704, 0.949969979834156, 0.943986274173002, 0.941001628199154, 0.932066788913148, 0.926080800490561, 0.920099053753847, 0.91116833279606, 0.902169133142995, 0.896152043528463, 0.881177346924239, 0.875193135907483, 0.872203575284039, 0.869207278320275, 0.857309023710775, 0.848421992818626, 0.839434570137547, 0.833459215086135, 0.827493527632619, 0.821511608241697, 0.818516504870906, 0.812530287850531, 0.806545301458209, 0.800520667700624, 0.788504317779599, 0.779499208968453, 0.76753682302423, 0.764535224692751, 0.758542854366408, 0.746643087255005, 0.740660154164664, 0.728796971370325, 0.722851218124105, 0.716910044469078, 0.705044665474413, 0.702077410385152, 0.696153525886713, 0.681418170940623, 0.67260509515153, 0.660844217439567 ], [ 0.999277487981792, 0.998553357599042, 0.997829237091254, 0.99710417672379, 0.996377361626995, 0.995648049432879, 0.994915257060425, 0.991253310198176, 0.989771583274112, 0.989029450846354, 0.9875325990774, 0.986008486326716, 0.985245535623559, 0.982950582288741, 0.981403739445045, 0.979850427761197, 0.977517078995485, 0.9751482936019, 0.973554486943764, 0.969552590992207, 0.967938970293575, 0.967129721757568, 0.966316544163349, 0.963066369097215, 0.960616445221447, 0.958119046496724, 0.956447429333005, 0.954769459155805, 0.953077720742534, 0.952227180730954, 0.950520162020759, 0.948803968432749, 0.947066660332597, 0.943571819475399, 0.9409262972535, 0.937376036066338, 0.936478650448705, 0.934679123812276, 0.931073502063006, 0.929244251313332, 0.925583871649869, 0.923732352300608, 0.921870729568298, 0.918117633409758, 0.917171619503635, 0.915273906739705, 0.910499952666305, 0.907607302382901, 0.903702170040254 ], [ 0.998608413113071, 0.997214644411471, 0.995821831013303, 0.994428148000897, 0.993032034348724, 0.991632072399662, 0.99022638680066, 0.983216197230745, 0.980386484787682, 0.978970679590968, 0.976118050823751, 0.97321758347997, 0.971767207148281, 0.967410751583306, 0.964479728136077, 0.961540752050541, 0.957133998596443, 0.952670279630427, 0.949672572311715, 0.942165647657932, 0.93914685715923, 0.937634655006455, 0.936116291486253, 0.930059391448493, 0.925506309233532, 0.920876059409491, 0.917783073342697, 0.91468336624063, 0.911563331503877, 0.909996636973231, 0.906856220364748, 0.903704188812084, 0.900518754266508, 0.89412719205641, 0.889303474943224, 0.882849825667986, 0.881222141750833, 0.877962502029932, 0.871448809785976, 0.868153120346739, 0.861576393621591, 0.85825886866239, 0.854929447472668, 0.848236143873358, 0.846553007760733, 0.84318146930209, 0.834728527145727, 0.82962662516592, 0.822762829346384 ]]) for i, ff in enumerate(f): actual_y = [ff(v) for v in expected_x] # check that values decrease self.assertTrue((numpy.diff(actual_y) < 0).all()) assert_array_almost_equal(actual_y, expected_y[i, :])
def test_standardize_numeric(numeric_data): numeric_data_frame, expected = numeric_data result = column.standardize(numeric_data_frame) assert isinstance(result, pandas.DataFrame) assert_array_almost_equal(expected, result)
def test_standardize_int_numpy_array(numeric_data): numeric_data_frame, expected = numeric_data result = column.standardize(numeric_data_frame.values.astype(int)) assert isinstance(result, numpy.ndarray) assert_array_almost_equal(expected, result)
def test_standardize_non_numeric(non_numeric_data_frame): result = column.standardize(non_numeric_data_frame) assert isinstance(result, pandas.DataFrame) tm.assert_frame_equal(non_numeric_data_frame, result)