def looRLS(XPath, yPath, metaPath): X, Y = readAuto(XPath, yPath) meta = {} if metaPath != None: print "Loading metadata from", metaPath meta = result.getMeta(metaPath) X_train, X_hidden, Y_train, Y_hidden = hidden.split(X, Y, meta=meta) kwargs = {} kwargs['train_features'] = X_train kwargs['train_labels'] = Y_train kwargs['regparam'] = 1.0 rls = RLS.createLearner(**kwargs) rls.train() bestperf = -1. for logrp in range(5, 25): rp = 2. ** logrp rls.solve(rp) Ploo = rls.computeLOO() perf = cindex(Y_train, Ploo) print "Leave-one-out %f for lambda 2^%d" %(perf, logrp) if perf > bestperf: bestperf = perf bestlogrp = logrp rp = 2. ** bestlogrp print "Best leave-one-out %f for lambda 2^%d" %(bestperf, bestlogrp) rls.solve(rp) model = rls.getModel() P = model.predict(X_hidden) perf = cindex(Y_hidden, P) print "final performance: %f" %perf
def nfoldRLS(X, Y, fcount): kwargs = {} kwargs['train_features'] = X kwargs['train_labels'] = Y rls = RLS.createLearner(**kwargs) rls.train() bestperf = -1. for logrp in range(5, 25): rp = 2. ** logrp rls.solve(rp) perfs = [] kf = KFold(len(Y), n_folds=fcount, indices=True, shuffle=True, random_state=77) for train, test in kf: P = rls.computeHO(test) perf = cindex(Y[test], P) perfs.append(perf) perf = np.mean(perfs) print "N-fold CV %f for lambda 2^%d" %(perf, logrp) if perf > bestperf: bestperf = perf bestlogrp = logrp rp = 2. ** bestlogrp print "Best N-fold CV %f for lambda 2^%d" %(bestperf, bestlogrp) rls.solve(rp) model = rls.getModel() return model
def testRLS(input): X, Y = svmlight_format.load_svmlight_file(input) hoindices = range(int(0.1 * len(Y))) hocompl = list(set(range(len(Y))) - set(hoindices)) trainX = X[hocompl] testX = X[hoindices] trainY = Y[hocompl] testY = Y[hoindices] print len(trainY), len(testY) kwargs = {} kwargs["train_features"] = trainX kwargs["train_labels"] = trainY rls = RLS.createLearner(**kwargs) rls.train() bestperf = -1.0 for logrp in range(-5, 5): rp = 2.0 ** logrp rls.solve(rp) Ploo = rls.computeLOO() perf = cindex(trainY, Ploo) print logrp, perf if perf > bestperf: bestperf = perf bestlogrp = logrp rp = 2.0 ** bestlogrp rls.solve(rp) P = rls.getModel().predict(testX)
def testCindex(self): y = np.random.random(100) p = np.random.random(100) perf = cindex(y, p) perf2 = slow_cindex(y, p) self.assertAlmostEqual(perf, perf2) y = np.random.random(10000) p = np.ones(10000) self.assertEqual(cindex(y, p), 0.5) # 9 pairs y = np.array([1, 2, 3, 3, 4]) p = np.array([-4, 1, 5, 5, 7]) # 0 inversions self.assertEqual(cindex(y, p), 1.0) # 1 inversion p = np.array([-4, 1, 8, 5, 7]) self.assertAlmostEqual(cindex(y, p), 8.0 / 9.0) # 1.5 inversions p = np.array([-4, 1, 8, 7, 7]) self.assertAlmostEqual(cindex(y, p), 7.5 / 9.0) # all wrong p = np.array([10, 9, 8, 7, 6]) self.assertEqual(cindex(y, p), 0.0) # all tied p = np.array([10, 10, 10, 10, 10]) self.assertEqual(cindex(y, p), 0.5) self.assertRaises(UndefinedPerformance, cindex, p, p)
def callback(self, learner): model = learner.getModel() P = model.predict(self.testX) perf = cindex(self.testY, P) self.perfs.append(perf)