def test1_leverage():
	"""
	Testing the leverage_make function
	"""

	X=np.ones((10,3))
	X[:,0] = np.array([-1,-.5,0,  0,  0, 0, 0,0,.5,1])
	X[:,1] = np.array([-1,  0,0,  0,-.5, 0,.5,0, 0,1])
	X[:,2] = np.array([-1,-.5,0,-.5,  0,.5, 0,0,.5,1])
	

	l=leverage_make(X)
	assert_allclose(X.dot(npl.pinv(X.T.dot(X))).dot(X.T).diagonal(),l[0])

	assert(l[1]==3)

	# more basic test of same thing:

	Y=np.ones((3,3))
	Y[:,0] = np.array([-1,0,1])
	Y[:,1] = np.array([-1,1,3])
	Y[:,2] = np.array([ 0,0,0])
	
	# since Mahalanobis = (N-1)(leverage-1/N)   
	# * where N is total number of observations.

	# and the second element is at the center of the distribution 
	# =>   0 = (N-1)(l_2 - 1/N)
	#      0 = 2 (l_2 - 1/3)
	#    2/3 = 2 l_2
	#    l_2 = 1/3

	leverage_y = leverage_make(Y)
	
	assert(np.round(leverage_y[0][1],10) == np.round(1/3,10))

	# did you notice that Y only as rank 2?
	assert(leverage_y[1] == 2)
Ejemplo n.º 2
0
sys.path.append(function_location)

from visuals_functions import three_d_scatter_rotation_gen,three_d_cluster_rotation_gen
from outlier_and_normalization_functions import leverage_make

# Kmean clustering with 2 clusters


##################
#### Leverage ####
##################

X_full     = np.load(data_created+"X_full.npy")
names_full = np.load(data_created+"names_full.npy")

leverage,X_rank = leverage_make(X_full)
X_minus1        = X_full[leverage!=np.max(leverage),:]

leverage,X_rank = leverage_make(X_minus1)
X_minus2        = X_minus1[leverage!=np.max(leverage),:]

# All that stuff

# hierarchical clustering

# http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
# http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.cluster.hierarchy.single.html


import scipy
import scipy.cluster
	if len(grey_option) == X_full.shape[-1]:
		grey_output, kk = "w_grey",0


	X                = data[num_data][:,grey_option]
	distance         = scipy.spatial.distance.pdist(X)
	single_hierarchy = scipy.cluster.hierarchy.single(distance)


	image_extension    = "single"+data_names[num_data]+"_"+grey_output+".png"

	# initial plot for aljustments to distance given to making the classes

	if data_names[num_data].rfind("full") !=(-1):
		
		leverage, _  =leverage_make(X)
		keepers      = np.array([True if x not in sorted(leverage)[-2:] else False 
							for x in leverage])
		X_new        = X[keepers,:]


		distance_new = scipy.spatial.distance.pdist(X_new)
		single_hierarchy_new = scipy.cluster.hierarchy.single(distance_new)

		plt.figure()
		plt.plot(single_hierarchy_new[:,2]/np.max(single_hierarchy_new[:,2]))
		plt.plot([0,159],[.4,.4])
		plt.text(159/2,.4+.025,str(.4))
		plt.plot([0,159],[.05,.05])
		plt.text(159/2,.05+.025,str(.05))
						"bad_SALT2","clare_mega_bad","clare_probably_bad"]


# visualizing bad ones
coloring_bad  = np.zeros(len(names_full))
names_in_mine = {}
for i,lists in enumerate(special_look):

	for element in lists:
		if element in names_full:
			names_in_mine[element]=special_look_names[i]
			coloring_bad[names_full==element]=(i+1)


coloring_standardized = coloring_bad.copy()
for i,cluster in enumerate(set(coloring_bad)):
	coloring_standardized[coloring_bad==cluster]=i

three_d_plot_funct(X_full,coloring_standardized,save=False)


# my suggested bad SN
leverage,rank=leverage_make(X_full)
bad=names_full[np.max(leverage)==leverage]


sorted(leverage)[1]

second = names_full[sorted(leverage)[1]==leverage]