Example #1
0
def divide_score(class_1_data, class_1_label, class_2_data, class_2_label, K=None, score=mean_center_distance_score):
    """
    use the above methods to evaluate the score of a certain partition
    :param class_1_data:
    :param class_1_label:
    :param class_2_data:
    :param class_2_label:
    :param K:
    :param score:
    :return:
    """
    if K is None:
        K = len(np.unique(class_1_label)) + len(np.unique(class_2_label))
    if 1 < len(np.unique(class_1_label)) < K - 1:
        class_1_s = score(class_1_data, class_1_label)
    else:
        class_1_s = 0
    if 1 < len(np.unique(class_2_label)) < K - 1:
        class_2_s = score(class_2_data, class_2_label)
    else:
        class_2_s = 0
    class_1_center = np.average(class_1_data, axis=0)
    class_2_center = np.average(class_2_data, axis=0)
    class_1_2_s = Distance.euclidean_distance(class_1_center, class_2_center)
    if 1 < len(np.unique(class_1_label)) < K - 1:
        confidence_score = class_1_2_s / (class_1_s + class_2_s)
    else:
        confidence_score = 0
    return confidence_score
Example #2
0
def mean_distance_score(class_data, class_label):
    """
    calculate mean distance between different samples
    :param class_data:
    :param class_label:
    :return:
    """
    center = np.mean(class_data, axis=0)
    distance_sum = 0
    for i in range(len(class_label)):
        distance_sum = distance_sum + Distance.euclidean_distance(center, class_data[i])
    return distance_sum / len(class_label)
Example #3
0
def max_distance_score(class_data, class_label):
    """
    calculate max distance between different samples
    :param class_data:
    :param class_label:
    :return:
    """
    max_distance = -np.inf
    center = np.mean(class_data, axis=0)
    for i in range(len(class_label)):
        distance = Distance.euclidean_distance(center, class_data[i])
        if distance > max_distance:
            max_distance = distance
    return max_distance
Example #4
0
def agg_score(class_1_data, class_1_label, class_2_data, class_2_label, score=mean_distance_score):
    """
    use the above methods to evaluate the score of a certain agglomeration
    :param class_1_data:
    :param class_1_label:
    :param class_2_data:
    :param class_2_label:
    :param score:
    :return:
    """
    class_1_distance = score(class_1_data, class_1_label)
    class_2_distance = score(class_2_data, class_2_label)
    class_1_center = np.mean(class_1_data, axis=0)
    class_2_center = np.mean(class_2_data, axis=0)
    distance_between_two_class = Distance.euclidean_distance(class_1_center, class_2_center)
    return 2 * distance_between_two_class / (class_1_distance + class_2_distance)
Example #5
0
def min_center_distance_score(class_data, class_label):
    """
    calculate min distance between different centers
    :param class_data:
    :param class_label:
    :return:
    """
    min_distance = np.inf
    class_center = []
    for label in np.unique(class_label):
        class_center.append(np.average([class_data[i] for i in range(len(class_label)) if class_label[i] == label], axis=0))
    for i in range(len(class_label)):
        for j in range(i+1, len(class_center)):
            distance = Distance.euclidean_distance(class_center[i], class_center[j])
            if distance < min_distance:
                min_distance = distance
    return min_distance
Example #6
0
def AIC(X,Y,dtype="default"):
	YC=set(Y)
	K=len(YC)
	S=0
	for i in YC:
		CI=[X[j] for j in range(len(Y)) if Y[j]==i]
		mui=getAvgEx(CI)
		for j in CI:
			if dtype=="default":
				di=Distance(j,mui)
				dij=di.euclidean_distance()
			else:
				dij=eval(dtype)(j,mui)
			S+=dij
	FT=S
	R=len(X)
	D=len(X[0])
	AC=FT+4*K*D
	AC=math.log(AC)
	return AC	
Example #7
0
def mean_center_distance_score(class_data, class_label):
    """
    calculate mean distance between different centers
    :param class_data:
    :param class_label:
    :return:
    """
    distance_sum = 0
    class_center = []
    for label in np.unique(class_label):
        class_center.append(np.average([class_data[i] for i in range(len(class_label)) if class_label[i] == label], axis=0))
    for i in range(len(class_center)):
        for j in range(i+1, len(class_center)):
            distance_sum = distance_sum + Distance.euclidean_distance(class_center[i], class_center[j])
    class_num = len(np.unique(class_label))
    total_num = np.power(class_num, 2) - class_num
    if total_num == 0:
        total_num = 1
    score = 2 * distance_sum / total_num
    return score
		file2_contents = file2.read().split('\n')
		f2contents = file2_contents[1].split(',') # index 0 contains feature headers

		data1 = []
		data2 = []
		for k in range(0, len(f1contents)):
			try:
				f1contents[k] = float(f1contents[k])
				data1.append(f1contents[k])
			except:
				pass
		for k in range(0, len(f2contents)):
			try:
				f2contents[k] = float(f2contents[k])
				data2.append(f2contents[k])
			except:
				pass

		indices = Distance.prune(data1, data2)
		#print("Euclidean: " + str(Distance.euclidean_distance(data1, data2, indices)))
		output.write(files[i] + "," + files[j] + "," + "euclidean" + "," + str(Distance.euclidean_distance(data1, data2, indices)) + "\n")
		#print("City: " + str(Distance.city_distance(data1, data2)))
		output.write(files[i] + "," + files[j] + "," + "city" + "," + str(Distance.city_distance(data1, data2)) + "\n")
		#print("Chebychev: " + str(Distance.chebychev_distance(data1, data2)))
		output.write(files[i] + "," + files[j] + "," + "chebychev" + "," + str(Distance.chebychev_distance(data1, data2)) + "\n")
		#print("Cosine: " + str(Distance.cosine_difference(data1, data2)))
		output.write(files[i] + "," + files[j] + "," + "cosine" + "," + str(Distance.cosine_difference(data1, data2)) + "\n")
		#print("Correlation: " + str(Distance.correlation_distance(data1, data2)))
		output.write(files[i] + "," + files[j] + "," + "correlation" + "," + str(Distance.correlation_distance(data1, data2)) + "\n")