forked from sheyma/eigen_decomp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
corr_fisher.py
92 lines (66 loc) · 2.38 KB
/
corr_fisher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import numpy as np
import numexpr as ne
ne.set_num_threads(ne.ncores) # inclusive HyperThreading cores
import sys
sys.path.append(os.path.expanduser('~/devel/mapalign/mapalign'))
sys.path.append(os.path.expanduser('~/devel/hcp_corr'))
import embed
import hcp_util
def fisher_r2z(R):
return ne.evaluate('arctanh(R)')
def fisher_z2r(Z):
X = ne.evaluate('exp(2*Z)')
return ne.evaluate('(X - 1) / (X + 1)')
# here we go ...
## parse command line arguments
# first arg is output prefix, e.g. /ptmp/sbayrak/fisher/fisher_
cliarg_out_prfx = sys.argv[1]
# the rest args are the subject path(s), e.g. /ptmp/sbayrak/hcp/*
cliarg_rest = sys.argv[2:]
# list of all subjects as numpy array
subject_list = np.array(cliarg_rest) # e.g. /ptmp/sbayrak/hcp/*
cnt_files = 4
N_user = None
N = len(subject_list)
for i in range(0, N):
subject = subject_list[i]
print "do loop %d/%d, %s" % (i+1, N, subject)
# load time-series matrix of the subject
K = hcp_util.t_series(subject, cnt_files=cnt_files, N_cnt=N_user)
# get upper-triangular of correlation matrix of time-series as 1D array
K = hcp_util.corrcoef_upper(K)
print "corrcoef data shape: ", K.shape
# Fisher r to z transform on the correlation upper triangular
K = fisher_r2z(K)
# sum all Fisher transformed 1D arrays
if i == 0:
SUM = K
else:
SUM = ne.evaluate('SUM + K')
del K
print "loop done"
# get average
SUM = ne.evaluate('SUM / N')
# Fisher z to r transform on average , now this is back to correlation array
SUM = fisher_z2r(SUM)
# transform correlation array into similarity array
SUM += 1.0
SUM /= 2.0
# get full similartiy matrix of correlations
N = hcp_util.N_original(SUM)
SUM.resize([N,N])
hcp_util.upper_to_down(SUM)
print "SUM.shape", SUM.shape
print "do embed for correlation matrix:", SUM.shape
# Satra's embedding algorithm
embedding, result = embed.compute_diffusion_map(SUM, alpha=0, n_components=20,
diffusion_time=0, skip_checks=True, overwrite=True)
# output prefix
out_prfx=cliarg_out_prfx
# output precision
out_prec="%g"
np.savetxt(out_prfx + "embedding2.csv", embedding, fmt=out_prec, delimiter='\t', newline='\n')
np.savetxt(out_prfx + "lambdas2.csv", result['lambdas'], fmt=out_prec, delimiter='\t', newline='\n')
np.savetxt(out_prfx + "vectors2.csv", result['vectors'], fmt=out_prec, delimiter='\t', newline='\n')
print result['lambdas']