Example #1
0
seed = np.random.randint(seed0)

if load == 0:
    cmdtxt = '%s %d train %s %s %s random %s' % (Code, seed, trfile, trlblfile,
                                                 settingfile, dirpath)
else:
    cmdtxt = '%s %d train %s %s %s load %s %s/001' % (
        Code, seed, trfile, trlblfile, settingfile, dirpath, dirpath)
os.system(cmdtxt)  # + ' > /dev/null')

# transductive learning:
unlbld = np.where(trlbl[:, 0] == -1)[0]
if len(unlbld) > 0:
    b = np.loadtxt('%s/final.b' % dirpath)[unlbld, :]
    gtlbl_unlbld = np.loadtxt('%s/train-label.dat' % Datapath)[unlbld, :]
    (roc_trans, roc_macro_trans) = myAUC.compute_auc(b, gtlbl_unlbld)
else:
    roc_trans = 0
    roc_macro_trans = 0

trtime = np.loadtxt('%s/likelihood.dat' % dirpath)[-1, 2]

# test
settingfile = '%s/settings.txt' % dirpath
fp = open(settingfile, 'w')
fp.write('M %d\nC %d\nD %d\nN %d\nT %d\nalpha %f\nconverged %f' %
         (M, C, Dt, N, T, alpha, converged))
fp.close()
seed = np.random.randint(seed0)

cmdtxt = '%s %d test %s %s %s/final %s' % (Code, seed, tfile, settingfile,
Example #2
0
					temp = np.sum(np.exp(wTx - maxval))
					logpy -= maxval + np.log(np.exp(-maxval)+temp)
					py[s,c] = temp/(np.exp(-maxval)+temp)
				else:
					temp = np.sum(np.exp(wTx))
					logpy -= np.log(1+temp)
					py[s,c] = temp/(1.0+temp)

			b[d,c] = 1-np.exp(logpy)
		for s in range(Sd):
			b_sent[scnt,:] = py[s,:]
			scnt += 1
		d += 1
	fp.close()

	(roc, roc_macro) = myAUC.compute_auc(b, tlbl)
	(roc_sent, roc_sent_macro) = myAUC.compute_auc(b_sent, tlbl_sent)

	# ThFprAUC for documents with no labels
	nolbld = np.where(np.sum(tlbl,1)==0)[0]
	if len(nolbld) > 0:
		TH = np.linspace(0,1,50)
		fpr = np.zeros(len(TH))
		for t,th in enumerate(TH):
			pred = np.round(b[nolbld] > th)
			tn = np.sum((1-pred) == 1)
			fp = np.sum(pred == 1)
			fpr[t] = fp/float(fp+tn)
		fprAUC = metrics.auc(TH,fpr)
	else:
		fprAUC = 0
Example #3
0
batchsize = 500
lag = 10000  #50
save_lag = 5000
psi1, psi2 = 1.0, 1.0
rho0 = 0.1
BurnIn = 50
s = 10

trtime = np.loadtxt('%s/likelihood.dat' % dirpath)[-1, 3]

# transductive learning:
unlbld = np.where(trlbl[:, 0] == -1)[0]
if len(unlbld) > 0:
    b = np.loadtxt('%s/001.b' % dirpath)[unlbld, :]
    gtlbl_unlbld = np.loadtxt('%s/train-label.dat' % Datapath)[unlbld, :]
    (roc_trans, roc_macro_trans) = myAUC.compute_auc(b, gtlbl_unlbld)
else:
    roc_trans = 0
    roc_macro_trans = 0

# test
tsettingfile = '%s/test_settings.txt' % dirpath
fp = open(tsettingfile, 'w')
fp.write('M %d\nC %d\nD %d\nN %d\nT %d\nL %d\n' % (M, C, Dt, N, T, L))
fp.write('alpha %f\nnu %f\nkappa %f\ntau %f\nbatchsize %d\n' %
         (alpha, nu, kappa, tau, batchsize))
fp.write('lag %d\nsave_lag %d\npsi %f %f\nrho0 %f\nburnin %d\ns %f\n' %
         (lag, save_lag, psi1, psi2, rho0, BurnIn, s))
fp.close()

seed = np.random.randint(seed0)
Example #4
0
    global test_sent_pred
    global svmC
    global c0

    os.system('tar -zxf dir/tmp_%d_%d.tar.gz ' % (c, c0))
    clf = joblib.load('dir/tmp_%d_%d/model.pkl' % (c, c0))

    (test_pred[:,
               c], test_sent_pred[:,
                                  c], bags) = testSVM(N, 'dir/tfile_%d' % c,
                                                      clf)


pool.map(test_model_class, [c for c in range(C)])

(roc, roc_macro) = myAUC.compute_auc(test_pred, tlbl)

(roc_sent, roc_sent_macro) = myAUC.compute_auc(test_sent_pred, gt_sent)

# ThFprAUC for documents with no labels
nolbld = np.where(np.sum(tlbl, 1) == 0)[0]
if len(nolbld) > 0:
    TH = np.linspace(0, 1, 50)
    fpr = np.zeros(len(TH))
    for t, th in enumerate(TH):
        pred = np.round(test_pred[nolbld] > th)
        tn = np.sum((1 - pred) == 1)
        fp = np.sum(pred == 1)
        fpr[t] = fp / float(fp + tn)
    fprAUC = metrics.auc(TH, fpr)
else:
Example #5
0
if os.path.isfile('dir/vccr.txt'):
    vccr = np.loadtxt('dir/vccr.txt')
else:
    vccr = np.zeros((len(clist), len(gammalist)))

for g0, svmGamma in enumerate(gammalist):
    for c0, svmC in enumerate(clist):

        if vccr[c0, g0] != 0:
            continue

        valid_pred = np.zeros(vlbl.shape)
        runC = [c for c in range(C) if np.sum(trlbl[:, c]) > 0]

        pool.map(train_model_class, runC)
        (roc, roc_macro) = myAUC.compute_auc(valid_pred, vlbl)
        vccr[c0, g0] = roc + np.random.randn() * 1e-5  # tie breaker

        #vccr[c0,g0] = np.mean((vlbl==valid_pred)**2)
        print('>>>>', c0, g0, svmC, vccr[c0, g0])
        np.savetxt('dir/vccr.txt', vccr, '%f')

ind = np.unravel_index(vccr.argmax(), vccr.shape)
c0 = ind[0]
svmC = clist[c0]
g0 = ind[1]
svmGamma = gammalist[g0]

test_pred = np.zeros(tlbl.shape)

Example #6
0
        logpy = 0.0
        for s, sent in enumerate(sents[1:]):
            temp = hTs[c]
            temp += np.sum([
                (1 - 2 * model_h[int(n), c]) * model_s[int(n), c]**2
                for n in sent.split()
            ])
            #for n in sent.split():
            #	temp += (1-2*model_h[c,int(n)])*model_s[c,int(n)]**2
            py[s, c] = np.exp(-temp)

        b[d, c] = np.max(py[:, c])
    d += 1
fp.close()

(roc, roc_macro) = myAUC.compute_auc(b, tlbl)

# ThFprAUC for documents with no labels
nolbld = np.where(np.sum(tlbl, 1) == 0)[0]
if len(nolbld) > 0:
    TH = np.linspace(0, 1, 50)
    fpr = np.zeros(len(TH))
    for t, th in enumerate(TH):
        pred = np.round(b[nolbld] > th)
        tn = np.sum((1 - pred) == 1)
        fp = np.sum(pred == 1)
        fpr[t] = fp / float(fp + tn)
    fprAUC = metrics.auc(TH, fpr)
else:
    fprAUC = 0
Example #7
0
	        for i, bag in enumerate(vbags.keys()):
	            for j in range(M[c]):
	        
	                H[i, j] = Hausdorff_dist(vX[vbags[bag], :], centers[c][j])

	        vphi[c] = np.exp( -0.5 * (H/delta)**2)
	        #pdb.set_trace()
	    pool.map(valid_compute_phi, [c for c in range(C)])

	    valid_pred = np.zeros(vlbl.shape)
	    for c in range(C):
	        valid_pred[:, c] = np.dot(vphi[c], W[c])

	    #pdb.set_trace()

	    (roc, roc_macro) = myAUC.compute_auc(EnMIMLNN.normalize(valid_pred, C), vlbl, npts=100)

	    vccr[mu_ind, alpha_ind] = roc
	    
	    print(alpha, mu, roc)
	    os.system('mkdir -p dir/tmp%d_%d' %(alpha_ind, mu_ind))
	    np.savetxt('dir/vccr', vccr, '%f')
	    pickle.dump(W, open('dir/tmp%d_%d/W' %(alpha_ind, mu_ind), 'wb'))
	    pickle.dump(delta, open('dir/tmp%d_%d/delta' %(alpha_ind, mu_ind), 'wb'))
	    pickle.dump(centers, open('dir/tmp%d_%d/center' %(alpha_ind, mu_ind),'wb'))
		    

# find best hyper-params
ind = np.unravel_index(vccr.argmax(), vccr.shape)
alpha_ind = ind[1]
mu_ind = ind[0]
Example #8
0
svmC = bestC


def test_model_class(c):
    global test_pred
    global svmC
    global c0
    os.system('tar -zxf dir/tmp_%d_%d.tar.gz ' % (c, c0))
    clf = joblib.load('dir/tmp_%d_%d/model.pkl' % (c, c0))

    (test_pred[:, c], temp, bags) = testSVM(N, 'dir/tfile_%d' % c, clf)


pool.map(test_model_class, [c for c in range(C)])

(roc, roc_macro) = myAUC.compute_auc(test_pred, tlbl)

# ThFprAUC for documents with no labels
nolbld = np.where(np.sum(tlbl, 1) == 0)[0]
if len(nolbld) > 0:
    TH = np.linspace(0, 1, 50)
    fpr = np.zeros(len(TH))
    for t, th in enumerate(TH):
        pred = np.round(test_pred[nolbld] > th)
        tn = np.sum((1 - pred) == 1)
        fp = np.sum(pred == 1)
        fpr[t] = fp / float(fp + tn)
    fprAUC = metrics.auc(TH, fpr)
else:
    fprAUC = 0