/
measures.py
executable file
·113 lines (92 loc) · 3.92 KB
/
measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
# mesures of distortion
import numpy as np
import core,utils
def measure(m,hyperplanes,pCentroids,pMeasure,distrib, dataset):
if m == "mse":
return MSE(hyperplanes,pCentroids,pMeasure,distrib, dataset)
elif m == "entropy":
return negEntropy(hyperplanes,pCentroids,pMeasure,distrib, dataset)
else:
print("Error: the measure parameter is unknown")
def negEntropy(hyperplanes,pCentroids,pMeasure,distrib, dataset):
"""
Returns the opposite (negative) of the overall entropy of the
hyperplane configuration inducted by the parameter hyperplanes.
The method used to calculate entropy is similar to that of MSE:
generate a big amount of random points following the distribution to
estimate the probabilities.
"""
regionsWithCentroids = core.centroids(hyperplanes,pCentroids,distrib)
entropies = np.zeros(len(regionsWithCentroids))
numberOfPointsUsed = 0
for i in range(pMeasure):
# generate point and find its region
x = utils.f(len(hyperplanes[0])-1 , distrib, dataset)
r = utils.findRegion(x,hyperplanes)
# match region with an already known one
for j in range(len(regionsWithCentroids)):
if np.all(regionsWithCentroids[j,0] == r):
#increase entropies counter
entropies[j] += 1
numberOfPointsUsed += 1
break;
# adjust values and convert to an actual entropy
entropies /= float(len(x))
entropies /= float(numberOfPointsUsed)
logEntropies = np.log2(entropies)
entropies *= logEntropies
return sum(entropies)
def MSE(hyperplanes,pCentroids,pMeasure,distrib, dataset):
"""
Returns MSE given the hyperplanes separating regions.
Parameter pCentroids is the number of realisations of f used for
determining the centroids of each region.
Parameter pMeasure is the number of realisations used for computing the MSE.
"""
error = 0.
numberOfPointsUsed = 0
regionsWithCentroids = core.centroids(hyperplanes,pCentroids,distrib)
for i in range(pMeasure):
x = utils.f(len(hyperplanes[0])-1 , distrib, dataset)
r = utils.findRegion(x,hyperplanes)
regionRegistered = False
for j in range(len(regionsWithCentroids)):
if np.all(regionsWithCentroids[j,0] == r):
c = regionsWithCentroids[j,1]
regionRegistered = True
break;
if regionRegistered:
error += utils.squareDistance(x,c)
numberOfPointsUsed += 1
error /= float(len(x))
error /= float(numberOfPointsUsed)
return error
def MSEforDirection(directions, pCentroids, pMeasure, distrib, dataset):
"""
A more efficient way to compute MSE for different directions (for a
specific update function)
"""
directionsMSE = np.zeros(len(directions))
numberOfPointsUsed = np.zeros(len(directions))
# define regions and centroids
regionsWithCentroids = []
for dir in directions:
regionsWithCentroids.append(core.centroids(dir,pCentroids,distrib) )
# calculate error
for k in range(pMeasure):
x = utils.f(len(directions[0][0])-1 , distrib, dataset) # pick a random point x
for i in range(len(directions)): #for each direction i
r = utils.findRegion(x,directions[i])
regionRegistered = False
for j in range(len(regionsWithCentroids[i])):
if np.all(regionsWithCentroids[i][j,0] == r):
c = regionsWithCentroids[i][j,1]
regionRegistered = True
break;
if regionRegistered:
directionsMSE[i] += utils.squareDistance(x,c)
numberOfPointsUsed[i] += 1.
directionsMSE /= float(len(x))
directionsMSE /= numberOfPointsUsed
return directionsMSE