-
Notifications
You must be signed in to change notification settings - Fork 0
/
StackedCAE.py
152 lines (114 loc) · 5.78 KB
/
StackedCAE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import theano
import numpy
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.sandbox.neighbours import images2neibs
from theano.sandbox.neighbours import neibs2images
from ConvAE import CAE
from Autoencoder import AE
class SCAE(object):
def __init__(self, rng, input, filter_shape, image_shape, factor, s, k_Top=5, do_fold=True):
# Input will be image_shape, filter_shape, input, rng
self.kshp = filter_shape
self.imshp = None
self.i_kshp = (self.kshp[1], self.kshp[0], self.kshp[2], self.kshp[3])
self.i_imshp = None
self.do_fold = do_fold
self.k_Top = k_Top
self.factor = factor
self.s = s
self.rng = rng
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))
# initialize weights with random weights
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(numpy.asarray(
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), # 2, 1, 1, 3
dtype=theano.config.floatX), name='conv_W',
borrow=True)
# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name='conv_b', borrow=True)
self.c = theano.shared(value=0.0, name='deconv_c')
self.W_tilde = self.W[:, :, ::-1, ::-1].dimshuffle(1, 0, 2, 3)
if input == None:
self.x = T.dmatrix(name='input')
else:
self.x = input
self.params = [self.W, self.b, self.c]
def Fold(self, conv_out, ds=(2,1)):
'''Fold into two. (Sum up vertical neighbours)'''
imgs = images2neibs(conv_out, T.as_tensor_variable(ds), mode='ignore_borders') # Correct 'mode' if there's a typo!
orig = conv_out.shape
shp = (orig[0], orig[1], T.cast(orig[2]/2, 'int32'), orig[3])
res = T.reshape(T.sum(imgs, axis=-1), shp)
return res
def kmaxPool(self, conv_out, pool_shape, k):
'''
Perform k-max Pooling.
'''
n0, n1, d, size = pool_shape
imgs = images2neibs(conv_out, T.as_tensor_variable((1, size)))
indices = T.argsort(T.mul(imgs, -1))
self.k_max_indices = T.sort(indices[:, :k])
S = T.arange(d*n1*n0).reshape((d*n1*n0, 1))
return imgs[S, self.k_max_indices].reshape((n0, n1, d, k))
def unpooling(self, Y_4D, Z, X_4D):
""" This method reverses pooling operation.
"""
Y = images2neibs(Y_4D, T.as_tensor_variable((1, Y_4D.shape[3])))
X = images2neibs(X_4D, T.as_tensor_variable((1, X_4D.shape[3])))
X_z = T.zeros_like(X)
X_ = T.set_subtensor( X_z[T.arange(X.shape[0]).reshape((X.shape[0], 1)), Z], Y )
return X_.reshape(X_4D.shape)
def Output(self):
# Convolve input with trained parameters.
conv_out = conv.conv2d(input=self.x, filters=self.W, border_mode='full',
filter_shape=self.kshp, image_shape=self.imshp)
# Fold conv result into two.
if self.do_fold:
fold = self.Fold(conv_out)
# k-max pooling.
k = T.cast(T.max((self.k_Top, T.ceil(self.factor * self.s))), 'int32')
if self.do_fold:
pool_shape = fold.shape
pooled_out = self.kmaxPool(fold, pool_shape, k)
else:
pool_shape = conv_out.shape
pooled_out = self.kmaxPool(conv_out, pool_shape, k)
return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
def get_hidden_values(self, input):
# convolve input feature maps with filters
self.conv_out = conv.conv2d(input=input, filters=self.W, border_mode='full',
filter_shape=self.kshp, image_shape=self.imshp)
# k-max pooling.
k = T.cast(T.max((self.k_Top, T.ceil(self.factor * self.s))), 'int32')
pool_shape = self.conv_out.shape
pool = self.kmaxPool(self.conv_out, pool_shape, k)
output = T.tanh(pool + self.b.dimshuffle('x', 0, 'x', 'x'))
self.shape = output.shape
hidden_input = output.flatten(2)
self.fully_connected = AE((self.rng), input=hidden_input, n_visible=self.kshp[0]*25*self.k_Top, n_hidden=60) # nkerns[0] replaced with 8
self.params.extend(self.fully_connected.params)
return self.fully_connected.get_hidden_values(hidden_input)
def get_reconstructed_input(self, hidden, start):
reconstruct_AE = self.fully_connected.get_reconstructed_input(hidden)
hidden_NN = reconstruct_AE.reshape(self.shape)
unpool = self.unpooling(hidden_NN, self.k_max_indices, start)
deconv = conv.conv2d(input=unpool, filters=self.W_tilde, filter_shape = self.i_kshp, image_shape=None)
return T.tanh(deconv + self.c.dimshuffle('x', 'x', 'x', 'x'))
# return val*(val>0)
def get_cost_updates(self, learning_rate):
y = self.get_hidden_values(self.x)
z = self.get_reconstructed_input(y, self.conv_out)
L = T.sum((self.x-z) ** 2, axis=(1,2,3))
cost = T.mean(L)
gparams = T.grad(cost, self.params)
rho = 1e-7
G = [(theano.shared(value=numpy.zeros_like(param.get_value()), name="AdaGrad_" + param.name, borrow=True)) for param in self.params]
G_update = [T.add(g_adag, T.sqr(grad_i)) for g_adag, grad_i in zip(G, gparams)]
updates = []
for param_i, g_update, grad_i, g in zip(self.params, G_update, gparams, G):
updates.append((param_i, param_i - learning_rate * grad_i / T.sqrt(g_update) ))
updates.append((g, g_update))
return (cost, updates)