-
Notifications
You must be signed in to change notification settings - Fork 0
/
distance_measures.py
59 lines (51 loc) · 2.06 KB
/
distance_measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
'''
Functions for computing Variation of Information & Bhattacharyya distance between two matrices assumed to come from multivariate gaussian distributions
Created on 21 Feb 2018
@author: bobaseb (Sebastian Bobadilla-Suarez)
*requires numpy & scikit-learn
- both functions give you the option of using ledoit-wolf shrinkage on the covariance matrix
'''
import numpy as np
from sklearn.covariance import ledoit_wolf
def var_info(mat1, mat2,reg=0):
#Variation of Information assuming normal distros
#expects rows to be observations and columns features
#expects same number of observations in both matrices
#needs checking....
n = mat1.shape[0]
mat0 = np.hstack([mat1,mat2])
if(reg==1):
cov_mat0 = ledoit_wolf(mat0)[0]
cov_mat1 = ledoit_wolf(mat1)[0]
cov_mat2 = ledoit_wolf(mat2)[0]
else:
cov_mat0 = np.cov(mat0)
cov_mat1 = np.cov(mat1)
cov_mat2 = np.cov(mat2)
(sign0, logdet0) = 0.5*n*np.linalg.slogdet(cov_mat0*2*np.exp(1)*np.pi)
(sign1, logdet1) = 0.5*n*np.linalg.slogdet(cov_mat1*2*np.exp(1)*np.pi)
(sign2, logdet2) = 0.5*n*np.linalg.slogdet(cov_mat2*2*np.exp(1)*np.pi)
MI = (logdet1 + logdet2 - logdet0)
return logdet0 - MI;
def bhdist (mu1, mu2, mat1, mat2,reg=0):
#Bhattacharyya_distance assuming normal distros
#expects columns to be observations and rows features
diff_mn_mat = np.matrix(mu1-mu2).T
if(reg==1):
cov_mat1 = ledoit_wolf(mat1)[0]
cov_mat2 = ledoit_wolf(mat2)[0]
else:
cov_mat1 = np.cov(mat1)
cov_mat2 = np.cov(mat2)
cov_mat_mn = (cov_mat1 + cov_mat2)/2
icov_mat_mn = np.linalg.inv(cov_mat_mn)
term1 = np.dot(np.dot(diff_mn_mat.T, icov_mat_mn), diff_mn_mat)/8
(sign1, logdet1) = np.linalg.slogdet(cov_mat1)
(sign2, logdet2) = np.linalg.slogdet(cov_mat2)
(sign_mn, logdet_mn) = np.linalg.slogdet(cov_mat_mn)
ln_det_mat1 = logdet1
ln_det_mat2 = logdet2
ln_det_mat_mn = logdet_mn
term2 = (ln_det_mat_mn/2) - (ln_det_mat1+ln_det_mat2)/4
result = term1+term2;
return result[0,0];