def _fit_example(self, **kwargs): x, y = get_x_y(pandas.read_csv(EXAMPLE_FILE), ["status", "time"], pos_label=1) coxnet = CoxnetSurvivalAnalysis(**kwargs) coxnet.fit(x.values, y) return x, y, coxnet
def test_alpha_too_small(): X, y = load_breast_cancer() Xt = OneHotEncoder().fit_transform(X) index = numpy.array([ 0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 68, 70, 71, 72, 75, 76, 78, 79, 80, 82, 84, 85, 86, 87, 88, 90, 91, 92, 93, 94, 95, 98, 99, 100, 102, 103, 104, 105, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 124, 125, 126, 127, 128, 130, 131, 132, 133, 135, 136, 137, 138, 139, 140, 143, 144, 145, 147, 148, 150, 151, 153, 154, 155, 156, 157, 158, 160, 161, 164, 165, 166, 167, 168, 169, 170, 171, 172, 174, 175, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197 ]) nn = CoxnetSurvivalAnalysis(alphas=[0.007295025406624247], l1_ratio=1.0) Xf, yf = Xt.iloc[index], y[index] with pytest.raises( ArithmeticError, match= "Numerical error, because weights are too large. Consider increasing alpha." ): nn.fit(Xf, yf)
def test_simple(self): y = numpy.empty(dtype=[("D", numpy.bool), ("Y", numpy.float64)], shape=5) y["D"] = [True, False, False, True, False] y["Y"] = [7., 8., 11., 11., 23.] x = pandas.DataFrame({"F1": [1, 1, 1, 0, 0], "F2": [23, 43, 54, 75, 67], "F3": [120, 98, 78, 91, 79], "F4": [0.123, 0.541, 0.784, 0.846, 0.331]}) coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0) coxnet.fit(x.values, y) expected_alphas = numpy.array( [7.02666666666667, 6.40243696630484, 5.83366211207401, 5.31541564828386, 4.84320877198972, 4.41295145312887, 4.02091700863675, 3.66370982370111, 3.3382359405709, 3.04167626017436, 2.77146212443153, 2.52525306776672, 2.30091654511542, 2.09650946083909, 1.91026133856035, 1.74055898614351, 1.5859325229961, 1.44504264866632, 1.31666904246323, 1.19969979362274, 1.09312177046848, 0.996011845149902, 0.907528897950459, 0.826906531910992, 0.753446434665921, 0.686512329995589, 0.625524466706047, 0.569954597101554, 0.519321401555745, 0.473186319551291, 0.431149751078499, 0.392847595491192, 0.357948097841098, 0.326148975375191, 0.297174799307102, 0.270774609184727, 0.24671973919085, 0.22480183754923, 0.204831061881182, 0.186634434881721, 0.170054346072885, 0.154947186657187, 0.141182105646904, 0.128639876495421, 0.117211864413924, 0.106799085428826, 0.0973113490299429, 0.0886664769834391, 0.0807895915432809, 0.0736124668960205, 0.0670729382214382]) # FIXME assert_array_almost_equal(expected_alphas, coxnet.alphas_[:len(expected_alphas)]) coef = pandas.DataFrame(coxnet.coef_[:, :len(expected_alphas)], dtype=float) expected_coef = pandas.read_csv(SIMPLE_COEF_FILE, header=None, skiprows=1) assert_columns_almost_equal(coef, expected_coef)
def test_breast_example(self): x, y = load_breast_cancer() x = column.encode_categorical(x) coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0) coxnet.fit(x.values, y) expected_alphas = numpy.array([ 0.207764947265866, 0.189307681974955, 0.172490109262135, 0.157166563357949, 0.143204319038428, 0.130482442022696, 0.118890741498079, 0.108328815700004, 0.0987051822799425, 0.0899364859290742, 0.0819467763944772, 0.0746668506343715, 0.0680336534144775, 0.0619897311537413, 0.0564827342889011, 0.051464963847614, 0.046892958302776, 0.0427271171295661, 0.0389313578046448, 0.0354728032765984, 0.0323214972006479, 0.0294501444711215, 0.0268338748043064, 0.0244500273239498, 0.0222779542835891, 0.0202988422256499, 0.0184955490282766, 0.0168524554284737, 0.0153553297355215, 0.0139912045628799, 0.0127482645108893, 0.0116157438274312, 0.0105838331601337, 0.00964359459245389, 0.00878688422772072, 0.00800628165059773, 0.0072950256549955, 0.0066469556817389, 0.00605645845875073, 0.00551841938157428, 0.00502817821311635, 0.00458148871890295, 0.00417448188822764, 0.00380363242263169, 0.00346572820145532, 0.00315784245998521, 0.00287730843921864, 0.00262169628767281, 0.00238879201517371, 0.00217657831633235, 0.00198321709761059, 0.00180703355663423, 0.00164650167585602, 0.00150023100492174, 0.0013669546172544, 0.00124551813654232, 0.00113486973808373, 0.00103405103838443, 0.000942188794098442, 0.000858487338411865, 0.000782221689357606, 0.00071273127036839, 0.000649414188678556, 0.000591722022016858, 0.00053915506843511, 0.00049125801812897, 0.000447616009762226, 0.000407851037136367, 0.000371618675081733, 0.000338605096211458, 0.000308524352698783, 0.00028111589953377, 0.000256142337807075, 0.000233387358474159, 0.000212653868789829, 0.000193762285185162, 0.000176548977800548, 0.000160864853202119, 0.000146574063005757, 0.000133552827223371, 0.000121688362139862, 0.000110877903434536, 0.000101027816085719, 9.20527833489927e-05, 8.38750677843702e-05, 7.64238379317803e-05, 6.96345548028444e-05, 6.34484128750348e-05 ]) assert_array_almost_equal(expected_alphas, coxnet.alphas_) expected_deviance_ratio = numpy.array([ 0, 0.00741462796207568, 0.0135178719105177, 0.0183232499901932, 0.0221250029051101, 0.0251530137843965, 0.0275599035016693, 0.0298664819929119, 0.033763232356598, 0.0374249162331977, 0.0409637006907067, 0.0454486054162627, 0.0551615080395675, 0.0651612844343542, 0.0736024993960834, 0.0808820441173129, 0.0894426534710234, 0.0992239010000626, 0.108910229105339, 0.121376204780063, 0.134004998770465, 0.145079557491685, 0.156667501995989, 0.167543840680748, 0.178622131991811, 0.189365153169168, 0.199027839424271, 0.20909726215036, 0.218610320633419, 0.228024278642459, 0.238171883969976, 0.248070501745195, 0.258480494697342, 0.268971907277929, 0.280744803445048, 0.291329662029924, 0.300942928439923, 0.309972153913063, 0.318315812887558, 0.325822700491885, 0.332992506325249, 0.339665277042211, 0.345876707002969, 0.351605625998246, 0.357206102668659, 0.362484660673399, 0.367624391654207, 0.372275248793233, 0.37674043994605, 0.380887801196039, 0.384795899779142, 0.388569806807258, 0.392075171498793, 0.395375481018565, 0.398377579969751, 0.400997300805061, 0.403375467852471, 0.405431976972633, 0.407443593366561, 0.409668341757423, 0.411628734365416, 0.413367576771339, 0.414896999887021, 0.416268233594787, 0.417475290203319, 0.418554781508749, 0.419526121036389, 0.420522904669104, 0.421455233639571, 0.422296101083462, 0.423049677446171, 0.423716974236606, 0.424302533927477, 0.424825925226932, 0.425286695396174, 0.425693415010937, 0.426052733081791, 0.426369464812111, 0.426652822940747, 0.42686317150694, 0.427072533094355, 0.427264216646862, 0.427427314063872, 0.427573225735422, 0.427700379783919, 0.427814235325525, 0.427912925916531, 0.427998148400703 ]) assert_array_almost_equal(expected_deviance_ratio, coxnet.deviance_ratio_) coef = pandas.DataFrame(coxnet.coef_, index=x.columns, dtype=float) expected_coef = pandas.read_csv(BREAST_CANCER_COEFFICIENTS_FILE, index_col=0) expected_coef.columns = numpy.arange(expected_coef.shape[1]) assert_columns_almost_equal(coef, expected_coef, 5)
def test_example_2_standardize(self): from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline x, y = get_x_y(pandas.read_csv(EXAMPLE_FILE), ["status", "time"], pos_label=1) expected_alphas = numpy.array( [0.263066005037211, 0.239695946189997, 0.218402018960187, 0.198999785536952, 0.18132119305624, 0.165213118007272, 0.15053603994994, 0.137162833055499, 0.124977665003457, 0.113874993697428, 0.103758653109983, 0.0945410203385227, 0.0861422566576188, 0.0784896159941638, 0.0715168148356887, 0.0651634581142872, 0.0593745160934302, 0.0540998477267133, 0.0492937663601004, 0.0449146440159821, 0.0409245508315483, 0.037288926528462, 0.0339762810682614, 0.0309579219007116, 0.0282077054426604, 0.0257018106348284, 0.0234185326151886, 0.0213380947218357, 0.0194424771970012, 0.0177152611085322, 0.0161414861369557, 0.0147075209963485, 0.0134009453666594, 0.0122104423148384, 0.0111257002729774, 0.0101373237244409, 0.00923675182439653, 0.00841618424987191, 0.00766851363708906, 0.00698726402087929, 0.00636653474297097, 0.00580094934331044, 0.00528560899173708, 0.00481605005665997, 0.00438820544321646, 0.0039983693660421, 0.00364316525153066, 0.00331951649156886, 0.0030246197954287]) scaler = StandardScaler() coxnet = CoxnetSurvivalAnalysis(l1_ratio=0.9) pipe = Pipeline([("standardize", scaler), ("coxnet", coxnet)]) pipe.fit(x.values, y) assert_array_almost_equal(expected_alphas, coxnet.alphas_) expected_coef = pandas.read_csv(EXAMPLE_COEF_FILE.format("2-std")) # rescale coefficients coef = pandas.DataFrame(coxnet.coef_ / scaler.scale_[:, numpy.newaxis], columns=expected_coef.columns, dtype=float) assert_columns_almost_equal(coef, expected_coef, 5)
def test_pipeline_predict(func): X_str, y = load_breast_cancer() X_num = column.encode_categorical(X_str) est = CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001, l1_ratio=1.0, fit_baseline_model=True) est.fit(X_num[10:], y[10:]) pipe = make_pipeline( OneHotEncoder(), CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001, l1_ratio=1.0, fit_baseline_model=True)) pipe.fit(X_str[10:], y[10:]) tree_pred = getattr(est, func)(X_num[:10]) pipe_pred = getattr(pipe, func)(X_str[:10]) for s1, s2 in zip(tree_pred, pipe_pred): assert_array_almost_equal(s1.x, s2.x) assert_array_almost_equal(s1.y, s2.y)