censor_times = numpy.random.uniform(0.0, 100.0, size=m) baseline_hazard = lambda t: numpy.exp(numpy.sin(t) - 2.0) sampler = HazardSampler(baseline_hazard, 10.0, 20.0) failure_times = numpy.array([sampler.draw() for _ in range(m)]) y = numpy.minimum(failure_times, censor_times) c = 1.0 * (censor_times > failure_times) with open(data_filename, 'w') as outfile: pickle.dump((m, y, c, censor_times, failure_times), outfile) pyplot.hist(y, bins=50) pyplot.show() t0 = time.time() model = GeneralizedRegressor(base_regressor=Earth(thresh=1e-7, max_terms=100, smooth=True, allow_linear=False, penalty=0), loss_function=MidpointLogHazardLossFunction(10)) model.fit(X=None,y=y,c=c) with open(modelfilename, 'w') as outfile: pickle.dump(model, outfile) t1 = time.time() print 'Total fitting time: %f seconds' % (t1 - t0) t = numpy.arange(0.0, 30.0, .1) predicted_log_hazard = model.predict(X=None, t=t) actual_log_hazard = numpy.sin(t) - 2.0 pyplot.figure() pyplot.plot(t, actual_log_hazard, 'r', label='actual log hazard') pyplot.plot(t, predicted_log_hazard, 'b', label='predicted log hazard') pyplot.show() try: print model.regressor_.trace() print model.regressor_.summary() except: pass
numpy.seterr(all='raise') m = 1000 n = 10 p = 10 def earth_basis(X, vars, parents, knots, signs): p = vars.shape[0] B = numpy.empty(shape=(m,p+1)) B[:,0] = 1.0 for i in range(p): knot = numpy.sort(X[:,vars[i]])[knots[i]] B[:,i+1] = B[:,parents[i]] * numpy.maximum(signs[i]*(X[:,vars[i]] - knot), 0.0) return B numpy.random.seed(1) X = numpy.random.normal(size=(m,n)) vars = numpy.argmax(numpy.random.multinomial(1, (1.0/float(n))*numpy.ones(n), p),1) knots = numpy.random.randint(6, m-6, size=p) parents = numpy.array([numpy.random.binomial(i, 1.0/float(p**2)) if i>0 else 0 for i in range(p)]) signs = numpy.random.binomial(1, .5, size=p) B = earth_basis(X, vars, parents, knots, signs) beta = numpy.random.uniform(-2.0,2.0,size=p+1) eta = numpy.dot(B, beta) model = GeneralizedRegressor(base_regressor=Earth(), loss_function=BinomialLossFunction(LogitLink())) n = numpy.random.randint(1, 10, size=m) mu = 1.0 / (1.0 + numpy.exp(-eta)) y = numpy.random.binomial(n, mu) model.fit(X, y, n=n) assert scipy.stats.pearsonr(model.predict(X), eta) > .99