예제 #1
0
파일: test.py 프로젝트: jcrudy/grm
 def test_binomial(self):
     model = GeneralizedRegressor(base_regressor=self.base_regressor(),
                                  loss_function=BinomialLossFunction(LogitLink()))
     n = numpy.random.randint(1, 10, size=m)
     mu = 1.0 / (1.0 + numpy.exp(-self.eta))
     y = numpy.random.binomial(n, mu)
     model.fit(self.X, y, n=n)
     assert self.assertion(model)
예제 #2
0
파일: test.py 프로젝트: jcrudy/grm
    def test_log_hazard(self):
        # Firstly, assume the last column of X is time
        y = self.X[-1,:]
        X = self.X[:-1,:]

        # Censor some of the times in a non-informative manner
        censor_times = numpy.random.uniform(y.min(), y.max(), size=y.shape[0])
        c = 1.0 * (y < censor_times)
        y = numpy.minimum(y, censor_times)

        # Fit the model
        model = GeneralizedRegressor(base_regressor=self.base_regressor(),
                                     loss_function=LogHazardLossFunction())
        model.fit(X, y, c=c)
예제 #3
0
if os.path.exists(data_filename) and not redo:
    with open(data_filename, 'r') as infile:
        m, y, c, censor_times, failure_times = pickle.load(infile)
else:
    censor_times = numpy.random.uniform(0.0, 100.0, size=m)
    baseline_hazard = lambda t: numpy.exp(numpy.sin(t) - 2.0)
    sampler = HazardSampler(baseline_hazard, 10.0, 20.0)
    failure_times = numpy.array([sampler.draw() for _ in range(m)])
    y = numpy.minimum(failure_times, censor_times)
    c = 1.0 * (censor_times > failure_times)
    with open(data_filename, 'w') as outfile:
        pickle.dump((m, y, c, censor_times, failure_times), outfile)
pyplot.hist(y, bins=50)
pyplot.show()
t0 = time.time()
model = GeneralizedRegressor(base_regressor=Earth(thresh=1e-7, max_terms=100, smooth=True, allow_linear=False, penalty=0), loss_function=MidpointLogHazardLossFunction(10))
model.fit(X=None,y=y,c=c)
with open(modelfilename, 'w') as outfile:
    pickle.dump(model, outfile)
t1 = time.time()
print 'Total fitting time: %f seconds' % (t1 - t0)
t = numpy.arange(0.0, 30.0, .1)
predicted_log_hazard = model.predict(X=None, t=t)
actual_log_hazard = numpy.sin(t) - 2.0
pyplot.figure()
pyplot.plot(t, actual_log_hazard, 'r', label='actual log hazard')
pyplot.plot(t, predicted_log_hazard, 'b', label='predicted log hazard')
pyplot.show()
try:
    print model.regressor_.trace()
    print model.regressor_.summary()
예제 #4
0
numpy.seterr(all='raise')
m = 1000
n = 10
p = 10
def earth_basis(X, vars, parents, knots, signs):
    p = vars.shape[0]
    B = numpy.empty(shape=(m,p+1))
    B[:,0] = 1.0
    for i in range(p):
        knot = numpy.sort(X[:,vars[i]])[knots[i]]
        B[:,i+1] = B[:,parents[i]] * numpy.maximum(signs[i]*(X[:,vars[i]] - knot), 0.0)
    return B

numpy.random.seed(1)
X = numpy.random.normal(size=(m,n))
vars = numpy.argmax(numpy.random.multinomial(1, (1.0/float(n))*numpy.ones(n), p),1)
knots = numpy.random.randint(6, m-6, size=p)
parents = numpy.array([numpy.random.binomial(i, 1.0/float(p**2)) if i>0 else 0 for i in range(p)])
signs = numpy.random.binomial(1, .5, size=p)
B = earth_basis(X, vars, parents, knots, signs)
beta = numpy.random.uniform(-2.0,2.0,size=p+1)
eta = numpy.dot(B, beta)

model = GeneralizedRegressor(base_regressor=Earth(),
                             loss_function=BinomialLossFunction(LogitLink()))
n = numpy.random.randint(1, 10, size=m)
mu = 1.0 / (1.0 + numpy.exp(-eta))
y = numpy.random.binomial(n, mu)
model.fit(X, y, n=n)
assert scipy.stats.pearsonr(model.predict(X), eta) > .99
예제 #5
0
파일: test.py 프로젝트: jcrudy/grm
 def test_gaussian(self):
     model = GeneralizedRegressor(base_regressor=self.base_regressor())
     mu = self.eta
     model.fit(self.X, mu)
     assert self.assertion(model)