def soft_cascade_LR_1LNN(trX1, trY1, teX1, teY1, trX2, teX2, lambda_vector, K1): (N, D1) = trX2.shape D = trX1.shape[1] C = 2 t1 = ComputeComplexity([D1, C]) t2 = ComputeComplexity([D, K1, C]) n_it = 10000 time1 = np.zeros((len(lambda_vector), 1)) accuracy1 = np.zeros((len(lambda_vector), 1)) F1 = np.zeros((len(lambda_vector), 1)) nnz_first = np.zeros((len(lambda_vector), 1)) for i, plambda in enumerate(lambda_vector): X = T.fmatrix() F = T.fmatrix() Y = T.fvector() w_l = CF.init_weights((D1, )) b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) # w_l.set_value(np.zeros((D1,))) # b_l.set_value(np.zeros((1,))) w_h1 = CF.init_weights((D, K1)) b1 = CF.init_weights((K1, )) w_o = CF.init_weights((K1, )) bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) pygx1 = CF.model00(F, w_l, b_l) pygx2 = CF.model3(X, w_h1, w_o, b1, bo, 0, 1) pygx_final = pygx1 * pygx2 yhat1 = (pygx1 > 0.5) yhat = (pygx2 > 0.5) reg = T.mean(t1 + t2 * pygx1) cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda * reg params = [w_l, b_l, w_h1, w_o, b1, bo] updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.001 * 5, rho=0.9, epsilon=1e-06) # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06) train = theano.function(inputs=[X, F, Y], outputs=cost, updates=updates, allow_input_downcast=True) reg_value = theano.function(inputs=[F], outputs=reg, allow_input_downcast=True) predict_first = theano.function(inputs=[F], outputs=yhat1, allow_input_downcast=True) predict_second = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True) max_iter = 300 for j in range(max_iter): c = train(trX1, trX2, trY1) r = reg_value(trX2) print(c - plambda * r, plambda * r) start1 = time.clock() for t in range(n_it): teQ1 = predict_first(teX2) end1 = time.clock() time1[i] = end1 - start1 inds_test = np.where(teQ1 == 1)[0] nnz_first[i] = inds_test.shape[0] # check that we get 100 percent recall from the first stage inds_true = np.where(teY1 == 1)[0] int_result = np.intersect1d(inds_test, inds_true) print("first stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test.shape[0], inds_true.shape[0], int_result.shape[0])) r1 = int_result.shape[0] / inds_true.shape[0] p1 = int_result.shape[0] / inds_test.shape[0] a1 = np.mean(teY1 == teQ1) print("first stage: recall = %f, precision = %f, accuracy = %f" % (r1, p1, a1)) teX11 = teX1[inds_test, :] start1 = time.clock() for t in range(n_it): teQ2 = predict_second(teX11) end1 = time.clock() time1[i] += end1 - start1 teY2 = np.zeros(teY1.shape, dtype=int) teY2.fill(0) teY2[inds_test] = teQ2 inds_second = np.where(teY2 == 1)[0] int_result = np.intersect1d(inds_second, inds_true) print("second stage nzs:%d,true nzs:%d,intersection:%d" % (inds_second.shape[0], inds_true.shape[0], int_result.shape[0])) r2 = int_result.shape[0] / inds_true.shape[0] p2 = int_result.shape[0] / inds_second.shape[0] a2 = np.mean(teY1 == teY2) print("second stage: recall = %f, precision = %f, accuracy = %f" % (r2, p2, a2)) F1[i] = 2 * r2 * p2 / (r2 + p2) accuracy1[i] = a2 return time1, accuracy1, F1, nnz_first
def cascade_three_stage(trX1, trY1, teX1, teY1, trX2, teX2, trX3, teX3, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a): (N,D) = trX3.shape lambda_vector = plambda n_it = 10000 time1 = np.zeros((len(lambda_vector),1)) accuracy1 = np.zeros((len(lambda_vector),1)) F1 = np.zeros((len(lambda_vector),1)) nnz_first = np.zeros((len(lambda_vector),1)) nnz_second = np.zeros((len(lambda_vector),1)) for i,plambda in enumerate(lambda_vector): X = T.fmatrix() F = T.fmatrix() E = T.fmatrix() Y = T.fvector() w_l = CF.init_weights((D,)) b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True,)) w_l.set_value(np.zeros((D,))) b_l.set_value(np.zeros((1,))) pygx1 = CF.model00(E, w_l, b_l) pygx2 = CF.model3(F, v_h1, v_o, c1, co, 0, 1) pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1) yhat1 = (pygx1 > 0.5) yhat2 = (pygx2 > 0.5) yhat = (pygx > 0.5) f = lambda x, a: 1/(1+T.exp(-a*(x-0.5))) pygx_final = (1-f(pygx1,a))*pygx1 + (1-f(pygx2,a))*f(pygx1,a)*pygx2 + f(pygx1, a)*f(pygx2, a)*pygx reg = T.mean(f(pygx1,a)) cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda*reg params = [w_l, b_l] updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.5, rho=0.9, epsilon=1e-06) # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06) train = theano.function(inputs=[X, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True) reg_value = theano.function(inputs=[E], outputs=reg, allow_input_downcast=True) predict_first = theano.function(inputs=[E], outputs=yhat1, allow_input_downcast=True) predict_second = theano.function(inputs=[F], outputs=yhat2, allow_input_downcast=True) predict_third = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True) max_iter = 500 for j in range(max_iter): # c = train(trX1, trY1) c = train(trX1, trX2, trX3, trY1) # r = reg_value(trX1) r = reg_value(trX3) print(c-plambda*r,plambda*r) # cost = train(trX1, trY1) start1 = time.clock() for t in range(n_it): teQ1 = predict_first(teX3) end1 = time.clock() time1[i] = end1 - start1 inds_test = np.where(teQ1 == 1)[0] nnz_first[i] = inds_test.shape[0] # check that we get 100 percent recall from the first stage inds_true = np.where( teY1 == 1 )[0] int_result = np.intersect1d(inds_test,inds_true) print("first stage nzs:%d,true nzs:%d,intersection:%d" %(inds_test.shape[0],inds_true.shape[0],int_result.shape[0])) r1 = int_result.shape[0] / inds_true.shape[0] p1 = int_result.shape[0] / inds_test.shape[0] a1 = np.mean(teY1 == teQ1) print("first stage: recall = %f, precision = %f, accuracy = %f" %(r1,p1,a1)) teX22 = teX2[inds_test,:] start1 = time.clock() for t in range(n_it): teQ2 = predict_second(teX22) end1 = time.clock() time1[i] += end1 - start1 inds_test2 = np.where(teQ2 == 1)[0] nnz_second[i] = inds_test2.shape[0] teY2 = np.zeros(teY1.shape,dtype = int) teY2.fill(0) teY2[inds_test] = teQ2 inds_second = np.where( teY2 == 1 )[0] int_result = np.intersect1d(inds_second, inds_true) print("second stage nzs:%d,true nzs:%d,intersection:%d" %(inds_second.shape[0],inds_true.shape[0],int_result.shape[0])) r2 = int_result.shape[0] / inds_true.shape[0] p2 = int_result.shape[0] / inds_second.shape[0] a2 = np.mean(teY1 == teY2) print("second stage: recall = %f, precision = %f, accuracy = %f" %(r2,p2,a2)) # teX1 = teX1[inds_test2,:] teX11 = teX1[inds_test[inds_test2],:] start1 = time.clock() for t in range(n_it): teQ3 = predict_third(teX11) end1 = time.clock() time1[i] += end1 - start1 teY3 = np.zeros(teY1.shape,dtype = int) teY3.fill(0) teY3[inds_test[inds_test2]] = teQ3 accuracy1[i] = np.mean(teY1 == teY3) inds_third = np.where( teY3 == 1 )[0] int_result2 = np.intersect1d(inds_third,inds_true) print("third stage nzs:%d,true nzs:%d,intersection:%d" %(inds_third.shape[0],inds_true.shape[0],int_result2.shape[0])) r3 = int_result2.shape[0] / inds_true.shape[0] p3 = int_result2.shape[0] / inds_third.shape[0] print("third stage: recall = %f, precision = %f, accuracy = %f" %(r3, p3, accuracy1[i])) F1[i] = 2*r3*p3/(r3 + p3) return time1, accuracy1, F1, nnz_first, nnz_second
def NN_pretraining_one(trX2, teP, teX1, teY1, K1): K1_vector = [K1] n_it = 10000 time1 = np.zeros((len(K1_vector), 1)) accuracy1 = np.zeros((len(K1_vector), 1)) inds_true = np.where(teY1 == 1)[0] for i, K1 in enumerate(K1_vector): (N, D) = trX2.shape X = T.fmatrix() Y = T.fvector() w_h1 = CF.init_weights((D, K1)) b1 = CF.init_weights((K1, )) w_o = CF.init_weights((K1, )) bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1) # yhat_second = T.argmax(pygx, axis=1) yhat_second = (pygx > 0.5) # cost_second = T.mean(T.nnet.categorical_crossentropy(pygx, Y)) cost_second = T.mean(T.nnet.binary_crossentropy(pygx, Y)) params_second = [w_h1, w_o, b1, bo] # updates_second = lasagne.updates.adagrad(cost_second, params_second, learning_rate=1/4, epsilon=1e-06) updates_second = lasagne.updates.rmsprop(cost_second, params_second, learning_rate=0.01, rho=0.9, epsilon=1e-06) train_second = theano.function(inputs=[X, Y], outputs=cost_second, updates=updates_second, allow_input_downcast=True) predict_second = theano.function(inputs=[X], outputs=yhat_second, allow_input_downcast=True) sgd_iters = 400 for j in range(sgd_iters): # train_second(trX2, teP) c = train_second(trX2, teP) print(c) start1 = time.clock() for t in range(n_it): teY3 = predict_second(teX1) end1 = time.clock() time1[i] += (end1 - start1) accuracy1[i] = np.mean(teY1 == teY3) inds_second = np.where(teY3 == 1)[0] int_result2 = np.intersect1d(inds_second, inds_true) print("nzs:%d,true nzs:%d,intersection:%d" % (inds_second.shape[0], inds_true.shape[0], int_result2.shape[0])) r2 = int_result2.shape[0] / inds_true.shape[0] p2 = int_result2.shape[0] / inds_second.shape[0] print("recall = %f, precision = %f, accuracy = %f" % (r2, p2, accuracy1)) F1 = 2 * r2 * p2 / (r2 + p2) return w_h1, w_o, b1, bo, time1, accuracy1, F1
def tree_cascade_v1(trX, trY, teX, teY, trX1, teX1, trX2, teX2, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a): lambda_vector = plambda n_it = 10000 time1 = np.zeros((len(lambda_vector), 1)) accuracy1 = np.zeros((len(lambda_vector), 1)) F1 = np.zeros((len(lambda_vector), 1)) nnz = np.zeros((len(lambda_vector), 1)) for i, plambda in enumerate(lambda_vector): (N, D1) = trX1.shape (N, D2) = trX2.shape X = T.fmatrix() Z = T.fmatrix() F = T.fmatrix() E = T.fmatrix() Y = T.fvector() w_l = CF.init_weights((D1, )) b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) w_l.set_value(np.zeros((D1, ))) b_l.set_value(np.zeros((1, ))) v_l = CF.init_weights((D2, )) c_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) v_l.set_value(np.zeros((D2, ))) c_l.set_value(np.zeros((1, ))) pygx1 = CF.model00(F, w_l, b_l) pygx2 = CF.model00(E, v_l, c_l) pygx3 = CF.model3(Z, v_h1, v_o, c1, co, 0, 1) pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1) yhat1 = (pygx1 > 0.5) yhat2 = (pygx2 > 0.5) yhat3 = (pygx3 > 0.5) yhat = (pygx > 0.5) f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5))) pygx_final = ((1 - f(pygx1, a) * f(pygx2, a)) * pygx1 * pygx2 + f(pygx1, a) * f(pygx2, a) * (1 - f(pygx3, a)) * pygx3 + f(pygx1, a) * f(pygx2, a) * f(pygx3, a) * pygx) kappa1 = 1 / 20 kappa2 = 1 / 10 kappa3 = 1 # reg = T.mean( f(pygx1,a)*f(pygx2,a) ) reg = T.mean(kappa1 + kappa2 * f(pygx1, a) * f(pygx2, a) + kappa3 * f(pygx1, a) * f(pygx2, a) * f(pygx3, a)) cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda * reg # params = [w_l, b_l, v_l, c_l] params = [ w_l, b_l, v_l, c_l, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co ] # params = [w_l, b_l, v_l, c_l, v_h1, v_o, c1, co] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo] # updates = lasagne.updates.adagrad(cost, params, learning_rate=0.1, epsilon=1e-06) updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.1 / 7, rho=0.9, epsilon=1e-06) train = theano.function(inputs=[X, Z, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True) reg_value = theano.function(inputs=[Z, F, E], outputs=reg, allow_input_downcast=True) # reg_value = theano.function(inputs=[F, E], outputs=reg, allow_input_downcast=True) predict_first = theano.function(inputs=[F], outputs=yhat1, allow_input_downcast=True) predict_second = theano.function(inputs=[E], outputs=yhat2, allow_input_downcast=True) predict_third = theano.function(inputs=[Z], outputs=yhat3, allow_input_downcast=True) predict_final = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True) max_iter = 2000 for j in range(max_iter): c = train(trX, trX, trX1, trX2, trY) r = reg_value(trX, trX1, trX2) # r = reg_value(trX1, trX2) print(c - plambda * r, plambda * r) start1 = time.clock() for t in range(n_it): teQ1 = predict_first(teX1) end1 = time.clock() time1[i] = end1 - start1 inds_test1 = np.where(teQ1 == 1)[0] nnz[i] = inds_test1.shape[0] inds_true = np.where(teY == 1)[0] int_result1 = np.intersect1d(inds_test1, inds_true) print("first stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test1.shape[0], inds_true.shape[0], int_result1.shape[0])) r1 = int_result1.shape[0] / inds_true.shape[0] p1 = int_result1.shape[0] / inds_test1.shape[0] a1 = np.mean(teY == teQ1) print("first stage: recall = %f, precision = %f, accuracy = %f" % (r1, p1, a1)) start1 = time.clock() for t in range(n_it): teQ2 = predict_second(teX2) end1 = time.clock() time1[i] = end1 - start1 inds_test2 = np.where(teQ2 == 1)[0] nnz[i] = inds_test2.shape[0] int_result2 = np.intersect1d(inds_test2, inds_true) print("second stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test2.shape[0], inds_true.shape[0], int_result2.shape[0])) r2 = int_result2.shape[0] / inds_true.shape[0] p2 = int_result2.shape[0] / inds_test2.shape[0] a2 = np.mean(teY == teQ2) print("second stage: recall = %f, precision = %f, accuracy = %f" % (r2, p2, a2)) inds_test = np.intersect1d(inds_test1, inds_test2) tps = np.intersect1d(inds_test, inds_true) print("intersects of first-second stages = %d, true positives = %d" % (inds_test.shape[0], tps.shape[0])) teXX = teX[inds_test, :] start1 = time.clock() for t in range(n_it): teQ3 = predict_third(teXX) end1 = time.clock() time1[i] += end1 - start1 inds_test3 = np.where(teQ3 == 1)[0] teQ33 = np.zeros(teY.shape, dtype=int) teQ33.fill(0) teQ33[inds_test] = teQ3 int_result3 = np.intersect1d(inds_test[inds_test3], inds_true) print("third stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test3.shape[0], inds_true.shape[0], int_result3.shape[0])) r3 = int_result3.shape[0] / inds_true.shape[0] p3 = int_result3.shape[0] / inds_test3.shape[0] a3 = np.mean(teY == teQ33) print("third stage: recall = %f, precision = %f, accuracy = %f" % (r3, p3, a3)) teX = teX[inds_test[inds_test3], :] start1 = time.clock() for t in range(n_it): teP = predict_final(teX) end1 = time.clock() time1[i] += end1 - start1 teY3 = np.zeros(teY.shape, dtype=int) teY3.fill(0) teY3[inds_test[inds_test3]] = teP accuracy1[i] = np.mean(teY == teY3) inds_second = np.where(teY3 == 1)[0] int_result = np.intersect1d(inds_second, inds_true) print("final stage nzs:%d,true nzs:%d,intersection:%d" % (inds_second.shape[0], inds_true.shape[0], int_result.shape[0])) r = int_result.shape[0] / inds_true.shape[0] p = int_result.shape[0] / inds_second.shape[0] print("final stage: recall = %f, precision = %f, accuracy = %f" % (r, p, accuracy1[i])) F1[i] = 2 * r * p / (r + p) return time1, accuracy1, F1, nnz
def cascade_rw(trX, trY, teX, teY, trX1, teX1, trX2, teX2, w_h1, w_o, b1, bo, plambda, a): lambda_vector = plambda n_it = 10000 time1 = np.zeros((len(lambda_vector), 1)) accuracy1 = np.zeros((len(lambda_vector), 1)) F1 = np.zeros((len(lambda_vector), 1)) nnz = np.zeros((len(lambda_vector), 1)) for i, plambda in enumerate(lambda_vector): (N, D1) = trX1.shape (N, D2) = trX2.shape X = T.fmatrix() F = T.fmatrix() E = T.fmatrix() Y = T.fvector() w_l = CF.init_weights((D1, )) b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) w_l.set_value(np.zeros((D1, ))) b_l.set_value(np.zeros((1, ))) v_l = CF.init_weights((D2, )) c_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) v_l.set_value(np.zeros((D2, ))) c_l.set_value(np.zeros((1, ))) pygx1 = CF.model00(F, w_l, b_l) pygx2 = CF.model00(E, v_l, c_l) pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1) yhat1 = (pygx1 > 0.5) yhat2 = (pygx2 > 0.5) yhat = (pygx > 0.5) f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5))) pygx_final = (1 - f(pygx1, a)) * pygx1 + (1 - f(pygx2, a)) * f( pygx1, a) * pygx2 + f(pygx1, a) * f(pygx2, a) * pygx kappa2 = 1 / 30 kappa3 = 1 # reg = T.mean(f(pygx1,a)) reg = T.mean(kappa2 * f(pygx1, a) + kappa3 * f(pygx1, a) * f(pygx2, a)) cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda * reg params = [w_l, b_l, v_l, c_l] # params = [w_l, b_l, v_l, c_l, w_h1, w_o, b1, bo] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo] # updates = lasagne.updates.adagrad(cost, params, learning_rate=1/2, epsilon=1e-06) updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.4, rho=0.9, epsilon=1e-06) train = theano.function(inputs=[X, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True) reg_value = theano.function(inputs=[F, E], outputs=reg, allow_input_downcast=True) # reg_value = theano.function(inputs=[X, F, E], outputs=reg, allow_input_downcast=True) # reg_value = theano.function(inputs=[F], outputs=reg, allow_input_downcast=True) predict_first = theano.function(inputs=[F], outputs=yhat1, allow_input_downcast=True) predict_second = theano.function(inputs=[E], outputs=yhat2, allow_input_downcast=True) predict_final = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True) predict_prob = theano.function(inputs=[X, F, E], outputs=pygx_final, allow_input_downcast=True) max_iter = 1000 for j in range(max_iter): c = train(trX, trX1, trX2, trY) r = reg_value(trX1, trX2) # r = reg_value(trX, trX1, trX2) # r = reg_value(trX1) print(c - plambda * r, plambda * r) probs = predict_prob(teX, teX1, teX2) AUC = roc_auc_score(teY, probs) start1 = time.clock() for t in range(n_it): teQ1 = predict_first(teX1) end1 = time.clock() time1[i] = end1 - start1 inds_test1 = np.where(teQ1 == 1)[0] nnz[i] = inds_test1.shape[0] inds_true = np.where(teY == 1)[0] int_result1 = np.intersect1d(inds_test1, inds_true) print("first stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test1.shape[0], inds_true.shape[0], int_result1.shape[0])) teX2 = teX2[inds_test1, :] start1 = time.clock() for t in range(n_it): teQ2 = predict_second(teX2) end1 = time.clock() time1[i] = end1 - start1 inds_test2 = np.where(teQ2 == 1)[0] nnz[i] = inds_test2.shape[0] int_result2 = np.intersect1d(inds_test1[inds_test2], inds_true) print("second stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test2.shape[0], inds_true.shape[0], int_result2.shape[0])) teX = teX[inds_test1[inds_test2], :] start1 = time.clock() for t in range(n_it): teP = predict_final(teX) end1 = time.clock() time1[i] += end1 - start1 teY3 = np.zeros(teY.shape, dtype=int) teY3.fill(0) teY3[inds_test1[inds_test2]] = teP accuracy1[i] = np.mean(teY == teY3) inds_second = np.where(teY3 == 1)[0] int_result = np.intersect1d(inds_second, inds_true) print("final stage nzs:%d,true nzs:%d,intersection:%d" % (inds_second.shape[0], inds_true.shape[0], int_result.shape[0])) r = int_result.shape[0] / inds_true.shape[0] p = int_result.shape[0] / inds_second.shape[0] print("final stage: recall = %f, precision = %f, accuracy = %f" % (r, p, accuracy1[i])) F1[i] = 2 * r * p / (r + p) return time1, accuracy1, F1, nnz, AUC
def cascade_two_stage(trX1, trY1, teX1, teY1, trX2, teX2, w_h1, w_o, b1, bo, plambda, a): lambda_vector = plambda # number of iterations for prediction: n_it = 10000 # prediction time: time1 = np.zeros((len(lambda_vector), 1)) # accuracy: accuracy1 = np.zeros((len(lambda_vector), 1)) # F1 score: F1 = np.zeros((len(lambda_vector), 1)) # number of non-zeros sent to the second stage: nnz = np.zeros((len(lambda_vector), 1)) for i, plambda in enumerate(lambda_vector): # N: number of training data points, D: number of dimensions/features in first stage data (N, D) = trX2.shape # second stage training data: X = T.fmatrix() # first stage training data: F = T.fmatrix() # lables for training data: Y = T.fvector() # random initialization of LR paramters: w_l = CF.init_weights((D, )) b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True, )) # zero initialization of LR parameters: w_l.set_value(np.zeros((D, ))) b_l.set_value(np.zeros((1, ))) # define LR model: pygx1 = CF.model00(F, w_l, b_l) # define 2LNN model: # pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1) # define 1LNN model: pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1) # hard threshold cascade: thresholding of output probabilities yhat1 = (pygx1 > 0.5) # output of first stage yhat = (pygx > 0.5) # output of second stage # definition of the gating function: f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5))) # output probability of the cascade: pygx_final = (1 - f(pygx1, a)) * pygx1 + f(pygx1, a) * pygx # regularization term: reg = T.mean(f(pygx1, a)) # objective function: cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda * reg # parameters of the optimization problem: params = [w_l, b_l] # params = [w_h1, w_o, w_l, b1, bo, b_l] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l] # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo] # updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.0004, rho=0.9, epsilon=1e-06) updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06) # theano function for training: train = theano.function(inputs=[X, F, Y], outputs=cost, updates=updates, allow_input_downcast=True) reg_value = theano.function(inputs=[F], outputs=reg, allow_input_downcast=True) # theano function for prediction in first stage: predict_first = theano.function(inputs=[F], outputs=yhat1, allow_input_downcast=True) # theano function for prediction in second stage: predict_second = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True) # number of steps in SGD: max_iter = 5000 # iterations for SGD/training: for j in range(max_iter): c = train(trX1, trX2, trY1) r = reg_value(trX2) print(c, c - plambda * r, plambda * r) # cost = train(trX1, trY1) # prediction for first stage: start1 = time.clock() for t in range(n_it): teQ1 = predict_first(teX2) # teQ1 = teX1.dot(w_l.get_value()) + b_check >= 0 # teQ1 = np.dot(teX2,w_l.get_value()) + b_l.get_value() >= 0 end1 = time.clock() time1[i] = end1 - start1 inds_test = np.where(teQ1 == 1)[0] nnz[i] = inds_test.shape[0] # indices for true positives inds_true = np.where(teY1 == 1)[0] # intersection of true positives and first-stage prediction int_result = np.intersect1d(inds_test, inds_true) print("first stage nzs:%d,true nzs:%d,intersection:%d" % (inds_test.shape[0], inds_true.shape[0], int_result.shape[0])) # recall from first stage: r1 = int_result.shape[0] / inds_true.shape[0] # precision from first stage: p1 = int_result.shape[0] / inds_test.shape[0] # accuracy from first stage: a1 = np.mean(teY1 == teQ1) print("first stage: recall = %f, precision = %f, accuracy = %f" % (r1, p1, a1)) # only send positive cases from first stage to the second stage: teX1 = teX1[inds_test, :] # prediction for the second stage start1 = time.clock() for t in range(n_it): teQ2 = predict_second(teX1) end1 = time.clock() time1[i] += end1 - start1 # output labels from the cascade teY3 = np.zeros(teY1.shape, dtype=int) teY3.fill(0) teY3[inds_test] = teQ2 # accuracy of the cascade: accuracy1[i] = np.mean(teY1 == teY3) inds_second = np.where(teY3 == 1)[0] int_result2 = np.intersect1d(inds_second, inds_true) print("second stage nzs:%d,true nzs:%d,intersection:%d" % (inds_second.shape[0], inds_true.shape[0], int_result2.shape[0])) # recall for the cascade: r2 = int_result2.shape[0] / inds_true.shape[0] # precision for the cascade: p2 = int_result2.shape[0] / inds_second.shape[0] print("second stage: recall = %f, precision = %f, accuracy = %f" % (r2, p2, accuracy1[i])) # F1 score for the cascade: F1[i] = 2 * r2 * p2 / (r2 + p2) return time1, accuracy1, F1, nnz