row = [1] for j in range(len(data[0])): seg = [] if j == 3: seg.append(data[i,j]) row = row + seg continue for k in range(0,K): seg.append(N(data[i,j], k, knots[j])) row = row + seg[1:] new_matrix[i] = np.array(row) return new_matrix if __name__ == '__main__': K = 5 X, Y = hd.loaddata() knots = split(X, K) print knots H = cookdata(X, K, knots) np.savetxt('H.dat', H, delimiter=',') print H.shape print np.linalg.matrix_rank(H) beta = lg.logistic(H, Y) print beta error = 0 for i in range(len(H)): res = lg.predict(H[i], beta) if Y[i] != res: error += 1 print 'error ratio:', float(error)/len(X)
if outputs[i] == 1: sbp_chd.append(inputs[i, 0]) else: sbp_no_chd.append(inputs[i, 0]) return np.array(sbp_chd), np.array(sbp_no_chd) def weight(x, xi, t): return np.exp(-(x-xi)*(x-xi)/(2*t)) def parzen(x0, X): s = 0. for i in range(len(X)): s += weight(x0, X[i], 30) return s/(len(X)*30) def draw(sbp_chd, sbp_no_chd): predictors = np.arange(100., 220., 0.5) responses = np.zeros(len(predictors)) for i in range(len(predictors)): responses[i] = parzen(predictors[i], sbp_chd) plt.plot(predictors, responses) for i in range(len(predictors)): responses[i] = parzen(predictors[i], sbp_no_chd) plt.plot(predictors, responses) if __name__ == '__main__': inputs, outputs = CHD.loaddata() sbp_chd, sbp_no_chd = cookdata(inputs, outputs) draw(sbp_chd, sbp_no_chd) plt.show()