def logreg_sex_and_class(): """ Logistic Regression using both sex and class as independent feature vars """ y = data[::,0] m = np.size(y) y = y.reshape([m,1]) xs = data[::, 2].reshape([m,1]) xcl = data[::, 1].reshape([m,1]) xcl1 = (xcl[::] == 1).astype(int) # convert class into two separate binary vars. xcl1=1 only if 1st class. 0 else xcl2 = int(xcl[::] == 2) # xcl2=1 only if 2nd class. 0 else xcl3 = int(xcl == 3) # not used xones = np.ones([m,1]) print np.shape(xcl2), np.shape(xs) x1int = ((xs.reshape(m))*(xcl1.reshape(m))).reshape([m,1]) # include interaction terms sex*cl1 and sex*cl2 x2int = ((xs.reshape(m))*(xcl2.reshape(m))).reshape([m,1]) x = np.hstack([xones, xcl1, xcl2, xs, x1int, x2int]) # full x array alpha = 0.1 niter = 40000 lam = 0 graddes = gradientdescent(x,y,alpha,niter,lam, logreg = True) thetapred = graddes[0] Jsteps = graddes[1] print "prediction for theta:", thetapred #print Jsteps scatter(np.arange(niter)+1,Jsteps) xlabel("Number of iterations") ylabel("Jcost") title("The convergence of the cost function") show() for cl in [1,2,3]: # generates the predicted survival table cl1 = int(cl == 1) cl2 = int(cl == 2) cl3 = int(cl == 3) for sex in [0,1]: print "class=", cl, "and sex=", sex xx = np.array([1,cl1,cl2,sex, cl1*sex, cl2*sex]) print glog(np.dot(thetapred, xx)) print "Time elapsed:", time() - start
def logreg_sexonly(): """ Here we implement logistic regression using only sex as the independent predictor var """ y = data[::,0] m = np.size(y) y = y.reshape([m,1]) x2 = data[::,2].reshape([m,1]) xones = np.ones([m,1]) print np.shape(xones), np.shape(x2) x = np.hstack([xones,x2]) alpha = 0.1 niter = 10000 lam = 0 graddes = gradientdescent(x,y,alpha,niter,lam, logreg = True) thetapred = graddes[0] Jsteps = graddes[1] print "prediction for theta:", thetapred #print Jsteps scatter(np.arange(niter)+1,Jsteps) xlabel("Number of iterations") ylabel("Jcost") title("The convergence of the cost function") show() xf = np.array([1,1]) # X for females xm = np.array([1,0]) # X for males Pf = glog(np.dot(thetapred,xf)) # predicted survival probabilities for female and male Pm = glog(np.dot(thetapred,xm)) print Pf, Pm print "Time elapsed:", time() - start
def logreg_sex_class_city(): """ Incorporating sex, class, and city into the logistic regression. """ y = data[::,0] m = np.size(y) y = y.reshape([m,1]) xs = data[::, 2].reshape([m,1]) xcl = data[::, 1].reshape([m,1]) xem = data[::, 7].reshape([m,1]) xcl1 = (xcl == 1).astype(int) xcl2 = (xcl == 2).astype(int) #xcl3 = (xcl == 3).astype(int) xem1 = (xem == 0).astype(int) xem2 = (xem == 1).astype(int) #xem3 = (xem == 2).astype(int) xones = np.ones([m,1]) xscl1 = ((xs.reshape(m))*(xcl1.reshape(m))).reshape([m,1]) xscl2 = ((xs.reshape(m))*(xcl2.reshape(m))).reshape([m,1]) xsem1 = ((xs.reshape(m))*(xem1.reshape(m))).reshape([m,1]) xsem2 = ((xs.reshape(m))*(xem2.reshape(m))).reshape([m,1]) xcl1em1 = ((xcl1.reshape(m))*(xem1.reshape(m))).reshape([m,1]) xcl2em1 = ((xcl2.reshape(m))*(xem1.reshape(m))).reshape([m,1]) xcl1em2 = ((xcl1.reshape(m))*(xem2.reshape(m))).reshape([m,1]) xcl2em2 = ((xcl2.reshape(m))*(xem2.reshape(m))).reshape([m,1]) xscl1em1 = ((xs.reshape(m))*(xcl1.reshape(m))*(xem1.reshape(m))).reshape([m,1]) xscl2em1 = ((xs.reshape(m))*(xcl2.reshape(m))*(xem1.reshape(m))).reshape([m,1]) xscl1em2 = ((xs.reshape(m))*(xcl1.reshape(m))*(xem2.reshape(m))).reshape([m,1]) xscl2em2 = ((xs.reshape(m))*(xcl2.reshape(m))*(xem2.reshape(m))).reshape([m,1]) doubles = np.hstack([xscl1, xscl2, xsem1, xsem2, xcl1em1, xcl2em1, xcl1em2, xcl2em2]) #quadratic interactions triples = np.hstack([xscl1em1, xscl2em1, xscl1em2, xscl2em2]) #cubic interactions x = np.hstack([xones, xcl1, xcl2, xs, xem1, xem2, doubles, triples]) # note that after running with the triples on and off there was virtually no difference in results... # perhaps only linear and quadratic terms are necessary? alpha = 0.3 niter = 50000 lam = 0 graddes = gradientdescent(x,y,alpha,niter,lam, logreg = True) thetapred = graddes[0] Jsteps = graddes[1] print "prediction for theta:", thetapred #print Jsteps scatter(np.arange(niter)+1,Jsteps) xlabel("Number of iterations") ylabel("Jcost") title("The convergence of the cost function") show() for cl in [1,2,3]: # create predicted survival table cl1 = int(cl == 1) cl2 = int(cl == 2) cl3 = int(cl == 3) for em in [0,1,2]: em1 = int(em == 0) em2 = int(em == 1) em3 = int(em == 2) for sex in [0,1]: print "class=", cl, "and sex=", sex, "and emb=", em xx = np.array([1,cl1,cl2,sex, em1, em2, cl1*sex, cl2*sex, em1*sex, em2*sex, cl1*em1, cl2*em1, cl1*em2, cl2*em2, sex*cl1*em1, sex*cl2*em1, sex*cl1*em2, sex*cl2*em2]) print glog(np.dot(thetapred, xx)) print "Time elapsed:", time() - start
# Write hypothesis X1 = sympy.symbols('X1') X1 = (X1 - means[0])/stds[0] XN = np.array([1,X1]) hypz = np.dot(thetapred,XN) # the hypothesis is hyp = glog(hypz) print "hypothesis for z (take glog(z) for actual hypothesis prob): ", hypz # Plot the hypothesis CHD probability ages = (np.arange(20,81,1)).reshape(61,1) agesfs = (ages-means[0])/stds[0] na = np.size(agesfs) X0 = np.ones([na,1]) Xa = np.hstack([X0, agesfs]) #print Xa chd = glog( np.dot( Xa, thetapred.T) ) plot1 = plot(ages,chd, 'b') xlim(20,80) ylim(-0.1,1.3) xlabel("Age") ylabel("Probability of CHD") title("Hypothesis probability of CHD by age") plot2 = plot(x,y,'ro') # plot the original data #legend([plot1], ("blue line"), "best", numpoints = 1) show() print "Time elapsed:", time() - start