-
Notifications
You must be signed in to change notification settings - Fork 0
/
tSNE.py
243 lines (201 loc) · 9.75 KB
/
tSNE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# Implementation of t-SNE, for reference see (Maaten & Hinton 2009)
# In addition I implement the original t-SNE also with ADAM gradient descent
# author: Stefan Lam
import numpy as np
from time import time
from utils import profile, shannon_entropy, cond_probs, joint_average_P, joint_Q, pca, make_dir, plot
from datasets import Dataset
class tsne:
"""
Class for t-SNE makes an object with the corresponding parameters.
"""
def __init__(self, d_components=2, initial_dims=30, initialization='PCA', perplexity=40, dof=1., early_exaggeration=4,
random_state=None, data_name = '', grad_method = 'gains', max_iter =1000, initial_momentum=0.5, final_momentum=0.8, learning_rate = 500.0):
self.d_components = d_components # dimension of projection space
self.initial_dims = initial_dims # initial dimensions of data, before applying t-sne
self.initialization = initialization # initialization method if there is one, defaults to PCA and uses initial_dims
self.perplexity = perplexity
self.early_exaggeration = early_exaggeration
self.random_state = random_state
self.max_iter = max_iter
self.initial_momentum = initial_momentum
self.final_momentum = final_momentum
self.learning_rate = learning_rate
self.grad_method = grad_method
self.data_name = data_name
self.dof = dof
def grad_descent_gains(self, X, Y, P):
'''
Gradient descent according to the method described in (Maaten & Hinton 2008)
Y is the initial solution
'''
P = P * self.early_exaggeration
(n, d) = X.shape
cost = np.zeros(self.max_iter)
min_gain = 0.01
dY = np.zeros((n, self.d_components)) # gradient
iY = np.zeros((n, self.d_components))# used for momemntum
gains = np.ones((n, self.d_components)) # adaptive
mean_abs_dY = np.zeros((self.max_iter, self.d_components))
t0 = time()
for iter in range(self.max_iter):
Q, num = joint_Q(Y, self.dof)
PQ_diff = P - Q
# gradient:
for i in range(n):
dY[i, :] = ((2.*self.dof+2.)/self.dof)*np.sum(np.tile(PQ_diff[:, i] * num[:, i], (self.d_components, 1)).T * (Y[i, :] - Y), 0)
# Perform the update
if iter < 20:
momentum = self.initial_momentum
else:
momentum = self.final_momentum
# individual gains
gains = (gains + 0.2) * ((dY > 0.) != (iY > 0.)) + \
(gains * 0.8) * ((dY > 0.) == (iY > 0.))
gains[gains < min_gain] = min_gain
iY = momentum * iY - self.learning_rate * (gains * dY)
Y = Y + iY
#Y = Y - np.tile(np.mean(Y, 0), (n, 1))
# Compute cost function
cost[iter] = np.sum(P * np.log(P / Q))
mean_abs_dY[iter, :] = np.mean(np.abs(dY), axis=0)
if (iter + 1) % 10 == 0:
C = np.sum(P * np.log(P / Q))
print("Iteration: %d cost: %.4f Mean Absolute gradient value: %s elapsed time: %.2f " % (iter + 1, C, str(mean_abs_dY[iter,:]),time() - t0))
# Stop the early exaggeration
if iter == 100:
P = P / self.early_exaggeration
np.savetxt('Models/probs/COIL20dim2gains.csv', Q, delimiter=',')
return Y, cost, mean_abs_dY
def grad_descent_ADAM(self, X, Y, P ):
'''
Gradient descent according to ADAM
Y is the initial solution
'''
P = P * self.early_exaggeration
(n, d) = X.shape
cost = np.zeros(self.max_iter)
dY = np.zeros((n, self.d_components)) # gradient
mean_abs_dY = np.zeros((self.max_iter, self.d_components))
alpha = self.learning_rate
beta_1 = 0.85
beta_2 = 0.9 # initialize the values of the parameters
epsilon = 1e-8
m_t = np.zeros((n, self.d_components))# first moment
v_t = np.zeros((n, self.d_components))# second moment
t0 = time()
for iter in range(self.max_iter):
t = iter + 1
Q, num = joint_Q(Y, self.dof)
Q = np.maximum(Q, 1e-12)
PQ_diff = P - Q
# gradient:
for i in range(n):
dY[i, :] = ((2.*self.dof+2.)/self.dof) * np.sum(np.tile(PQ_diff[:, i] * num[:, i], (self.d_components, 1)).T * (Y[i, :] - Y), 0)
m_t = beta_1 * m_t + (1 - beta_1) * dY
v_t = beta_2 * v_t + (1 - beta_2) * (dY * dY)
m_corr = m_t / (1 - (beta_1 ** t))
v_corr = v_t / (1 - (beta_2 ** t))
Y = Y - (alpha * m_corr) / (np.sqrt(v_corr) + epsilon)
# Compute cost function
cost[iter] = np.sum(P * np.log(P / Q))
mean_abs_dY[iter, :] = np.mean(np.abs(dY), axis=0)
if (iter + 1) % 10 == 0:
C = np.sum(P * np.log(P / Q))
print("Iteration: %d cost: %.4f Mean Absolute gradient value: %s elapsed time: %.2f " % (iter + 1, C, str(mean_abs_dY[iter,:]),time() - t0))
# Stop the early exaggeration
if iter == 100:
P = P / self.early_exaggeration
np.savetxt('Models/probs/COIL20dim2ADAM.csv', Q, delimiter=',')
return Y, cost, mean_abs_dY
def grad_descent(self, X, Y, P):
'''
Regular gradient descent Y is the initial solution
'''
P = P * self.early_exaggeration
(n, d) = X.shape
cost = np.zeros(self.max_iter)
dY = np.zeros((n, self.d_components)) # gradient
mean_abs_dY = np.zeros((self.max_iter, self.d_components))
t0 = time()
for iter in range(self.max_iter):
Q, num = joint_Q(Y, self.dof)
PQ_diff = P - Q
# gradient:
for i in range(n):
dY[i, :] = ((2.*self.dof+2.)/self.dof)*np.sum(np.tile(PQ_diff[:, i] * num[:, i], (self.d_components, 1)).T * (Y[i, :] - Y), 0)
# Perform the update
Y = Y - self.learning_rate * dY
#Y = Y - np.tile(np.mean(Y, 0), (n, 1))
# Compute cost function
cost[iter] = np.sum(P * np.log(P / Q))
mean_abs_dY[iter, :] = np.mean(np.abs(dY), axis=0)
if (iter + 1) % 10 == 0:
C = np.sum(P * np.log(P / Q))
print("Iteration: %d cost: %.4f Mean Absolute gradient value: %s elapsed time: %.2f " % (iter + 1, C, str(mean_abs_dY[iter,:]),time() - t0))
# Stop the early exaggeration
if iter == 100:
P = P / self.early_exaggeration
return Y, cost, mean_abs_dY
@profile
def transform(self, X):
"""
Reduces the dimensionality of X with t-SNE according to gradient descent
"""
print("Start transforming X...")
begin = time()
if self.random_state is not None:
print("transforming X with random state: " + str(self.random_state))
np.random.seed(self.random_state)
else:
print("No random state specified...")
if(self.initialization is "PCA"):
print("First reducing dimensions of X with PCA to %.2f dimensions" %(self.initial_dims))
X, _ = pca(X, self.initial_dims)
(n, d) = X.shape
Y = np.random.randn(n, self.d_components) # initialize a random solution
cond_P, _ = cond_probs(X, perplexity=self.perplexity)
P = joint_average_P(cond_P)
#np.savetxt('results/' + self.data_name + 'Probabilities'+self.grad_method + '.csv', P, delimiter=',' )
print("Start gradient descent...")
t0 = time()
if self.grad_method == 'ADAM':
Y, cost, grad_value = self.grad_descent_ADAM(X, Y, P)
elif self.grad_method == 'gains':
Y, cost, grad_value = self.grad_descent_gains(X, Y, P)
elif self.grad_method == 'SGD':
Y, cost, grad_value = self.grad_descent(X, Y, P)
#np.savetxt('results/' + self.data_name + '/' +self.grad_method + 'cost' + str(self.d_components) +'.csv', cost, delimiter=',' )
#np.savetxt('results/' + self.data_name + '/'+ self.grad_method + 'Y' +str(self.d_components) +'.csv', Y, delimiter=',')
print("Gradient descent took %.4f seconds" % (time() - t0))
return Y, cost, grad_value
if __name__ == '__main__':
'''
main to implement tSNE on the datasets
'''
seed = 0
dataset = Dataset(seed)
d_components = [2]
data_name = 'COIL20'
grad_method = 'gains'
n_train = 960
X, y, X_train, y_train, X_test, y_test = dataset.get_data(data_name, n_train, 10000)
#X, y, X_train, y_train, X_test, y_test = dataset.get_coil20_data()
for d in d_components:
if grad_method == 'ADAM':
model = tsne(random_state=0, initialization='PCA', initial_dims=30, grad_method='ADAM', perplexity=40,
max_iter=1000, d_components=d, learning_rate=0.1)
elif grad_method =='gains':
model = tsne(random_state=0, initialization='PCA', initial_dims=30, grad_method='gains', perplexity=40,
max_iter=1000, d_components=d, learning_rate=100)
elif grad_method == 'SGD':
model = tsne(random_state=0, initialization='PCA', initial_dims=30, grad_method='SGD', perplexity=40,
max_iter=1000, d_components=d, learning_rate=100)
file_path = 'Models/tSNE/' + data_name + str(n_train) + 'dim' + str(d) + grad_method
make_dir(file_path)
Y, cost, grad_value = model.transform(X_train)
make_dir(file_path)
np.savetxt(file_path + 'Y2.csv', Y, delimiter=',')
np.savetxt(file_path + 'cost.csv', cost, delimiter=',')
np.savetxt(file_path + 'grad_value.csv', grad_value, delimiter=',')
#plot(Y, y_train, cmap='Paired', s = 1, linewidth= 0.1)