Example #1
0
def test8():

	n = 500
	r = 50
	nt = 100
	#rcross = 50
	d = 50
	hubs = 1

	verbose = False

	num_eval = 100

	#X, Y = createFakeData(n, r, nt, rcross)
	#X, Y = createFakeData2(n, r, nt, hubs)
	X,Y = np.load('t8data.npy')
	# import IPython
	# IPython.embed()

	init_pt = np.nonzero(Y)[0][0]

	ker = True


	pi = sum(Y)/len(Y)
	print "Constructing the similarity matrix:"
	A = X.T.dot(X)
	t1 = time.time()
	if ker:
		print "Performing Kernel AS"
		f1,h1,s1,fs1 = AS.kernel_AS (X, Y, pi=pi, num_eval=num_eval, init_pt=init_pt, verbose=verbose,all_fs=True)
	t2 = time.time()
	#print "Performing Eigen decmop"
	#Xe, b, w, deg = eigenmap(A, d)
	#t3 = time.time()
	if ker:
		print "Performing Naive Shari AS"
		f2,h2,s2,fs2 = AS.shari_activesearch_probs_naive(A, labels=Y, pi=pi, w0=None, eta=None, num_eval=num_eval, init_pt=init_pt, verbose=verbose, all_fs=True)
		#f2,h2,s2,fs2 = AS.lreg_AS (Xe, deg, d, alpha=0.0, labels=Y, options={'num_eval':num_eval,'pi':pi,'n_conncomp':b}, verbose=verbose)
	t4 = time.time()

	print "Time taken for kernel:", t2-t1
	#print "Time taken for eigenmap + computing X.T*X:", t3-t2
	print "Time taken for Shari's method (naive):", t4-t2
	if ker:
		print "h_kernel: %i/%i"%(h1[-1],num_eval)
		print "h_lreg: %i/%i"%(h2[-1],num_eval)

	import IPython
	IPython.embed()
Example #2
0
def test9():
	verbose = True

	datadir = osp.join(os.getenv('HOME'), 'Research/Data/ActiveSearch/sibi_matrices')
	tsfile = osp.join(datadir, 'timestamps.csv')
	tffile = osp.join(datadir, 'tfidf_pretranspose.txt')
	contactsfile = osp.join(datadir, 'email_person_bitarray.txt')

	#ts_data = ef.load_timestamps (tsfile)
	Xfull = load_sparse_csr('Xfull1.npz')

	n = 5000
	r = 2000
	nt = 15#int(0.1*n)
	num_eval = nt*2
	# getting rid of features which are zero for all these elements
	X = np.array((Xfull[:,:n]).todense())
	X = X[np.nonzero(X.sum(axis=1))[0],:]
	X = X[:,np.nonzero(X.sum(axis=0))[1]]
	# import IPython 
	# IPython.embed()
	X = X[:r,:]
	X = X[np.nonzero(X.sum(axis=1))[0],:]
	X = X[:,np.nonzero(X.sum(axis=0))[1]]
	# import IPython 
	# IPython.embed()

	r,n = X.shape
	d = 20
	nt = 10#int(0.1*n)
	num_eval = 15#nt*2
	Y = np.array([1]*nt + [0]*(n-nt), dtype=int)

	pi = nt*1.0/n
	init_pt = 100

	A = X.T.dot(X)
	import IPython
	IPython.embed()

	t1 = time.time()
	print "Kernel method"
	#f1,h1,s1,fs1,dt = AS.kernel_AS (X, Y, pi=pi, num_eval=num_eval, init_pt=init_pt, verbose=verbose,all_fs=True,tinv=True)
	t2 = time.time()
	print "Eigen map"
	#Xe, b, w, deg = eigenmap(A, d)
	#np.save('eigenstuff',[Xe, b, w, deg])
	Xe, b, w, deg  = np.load('eigenstuff.npy')
	# import IPython 
	# IPython.embed()
	t3 = time.time()
	print "Shari method"
	#f2,h2,s2 = AS.lreg_AS (Xe, deg, d, alpha=0.0, labels=Y, options={'num_eval':num_eval,'pi':pi,'n_conncomp':b,'init_pt':init_pt}, verbose=verbose)
	t4 = time.time()
	f3,h3,s3,fs3 = AS.shari_activesearch_probs_naive(A, labels=Y, pi=pi, w0=None, eta=None, num_eval=num_eval, init_pt=init_pt, verbose=verbose, all_fs=True)

	print "Time taken for kernel:", t2-t1
	#print "Time taken for inverse:", dt
	print "Time taken for eigen decomp:", t3 - t2
	print "Time taken for lreg:", t4-t3


	#f1 = np.squeeze(f1)

	import IPython
	IPython.embed()