-
Notifications
You must be signed in to change notification settings - Fork 0
/
phased_lstm.py
248 lines (171 loc) · 10.3 KB
/
phased_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import tensorflow as tf
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn.python.ops import core_rnn_cell
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
_linear = core_rnn_cell._linear
def random_exp_initializer(minval=0, maxval=None, seed=None,
dtype=dtypes.float32):
'''Returns an initializer that generates tensors with an exponential distribution.
Args:
minval: A python scalar or a scalar tensor. Lower bound of the range
of random values to generate.
maxval: A python scalar or a scalar tensor. Upper bound of the range
of random values to generate. Defaults to 1 for float types.
seed: A Python integer. Used to create random seeds. See
[`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
for behavior.
dtype: The data type.
Returns:
An initializer that generates tensors with an exponential distribution.
'''
# 定义 指数初始化 器
def _initializer(shape, dtype=dtype, partition_info=None):
return tf.exp(random_ops.random_uniform(shape,
minval,
maxval,
dtype,
seed=seed))
return _initializer
# 计算 事件门 的参数 phi
def phi(times, s, tau):
# return tf.div(tf.mod(tf.mod(times - s, tau) + tau, tau), tau)
return tf.div(tf.mod(times - s, tau), tau)
# 根据phi 来计算事件门的输出
def time_gate_fast_2(phase, r_on, leak_rate, training_phase):
if not training_phase:
leak_rate = 0.0
is_up = tf.less(phase, (r_on * 0.5))
is_down = tf.logical_and(tf.less(phase, r_on), tf.logical_not(is_up))
time_gate = tf.where(is_up, 2 * phase / r_on, tf.where(is_down, 2. - 2. * (phase / r_on), leak_rate * phase))
return time_gate
# 第二个实现的版本
def time_gate_fast(phase, r_on, leak_rate, training_phase):
if not training_phase:
leak_rate = 0.0
cond_1 = tf.cast(tf.less_equal(phase, 0.5 * r_on), dtype='float32')
cond_2 = tf.cast(tf.logical_and(tf.less(0.5 * r_on, phase), tf.less(phase, r_on)), dtype='float32')
cond_3 = tf.cast(tf.greater_equal(phase, r_on), dtype='float32')
term_1 = tf.multiply(cond_1, 2.0 * phase / r_on)
term_2 = tf.multiply(cond_2, 2.0 - 2.0 * phase / r_on)
term_3 = tf.multiply(cond_3, leak_rate * phase)
return term_1 + term_2 + term_3
class PhasedLSTMCell(RNNCell):
def __init__(self,
num_units, # 隐含层神经元的个数, 也是细胞状态的神经元的个数
use_peepholes=True,
training_phase=True,
leak_rate=0.001,
r_on_init=0.05,
tau_init=6., # 这个参数咋后面是要学习的
activation=tanh):
# 隐含层有多少个神经元
self._num_units = num_units
# 采用什么激活函数
self._activation = activation
# 是否采用窥视孔
self._use_peepholes = use_peepholes
# 事件门 在 close state 时候的 率
self._leak_rate = leak_rate # only during training
# 是否在 training_phase 状态 ???
self._training_phase = training_phase
# r_on 开放时间占周期的 初始比例
self.r_on_init = r_on_init
# tau 周期的初始长度
self.tau_init = tau_init
@property
def state_size(self):
# 这里包含了 细胞状态 的神经元个数 和 隐含层神经元个数
return self._num_units, self._num_units
@property
def output_size(self):
# 输出层的神经元个数
return self._num_units
def __call__(self,
inputs, # 输入包含 x 输入 和 t 的输入
state, # 状态包含了细胞状态和隐含层状态
scope=None):
"""
Phased long short-term memory cell (P-LSTM).
"""
with vs.variable_scope(scope or type(self).__name__):
# Parameters of gates are concatenated into one multiply for efficiency.
# 初始状态 state 是一个元组 ( c,h)
if state is tuple: # 如果是元组的话,就可以直接分开
c_prev, h_prev = state
else: # 如果不是元组的话, 那么就是多维数组 , 就在第二个维度对他们进行划分
c_prev, h_prev = array_ops.split(value=state, num_or_size_splits=2, axis=1)
# (2, batch_size, seq_len)
# NB: here we explicitly give t as input.
# input的第一个维度长度为2 , 第一个元素是 x 的输入, 第二个元素是时间变量的输入
x = tf.reshape(inputs[:, 0],
(-1, 1)) # 第二个维度的长度是1, 第一个维度根据需要摆放
# 取最后一个批次的所有的 时间戳变量
t = inputs[:, 1][-1] # Now we only accept one id. We have a batch so it's a bit more complex.
# maybe the information should come from the outside. To be defined later.
# 就是矩阵乘法
concat = _linear([x, h_prev], 4 * self._num_units, True) # 这会儿还没有涉及到及激活函数
# 注意,这里只计算到线性组合的结果是有意义的
# 因为后面 可以线性组合的后面再加上窥视孔连接
# 的结果
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
dtype = inputs.dtype
# 忽然想到 , mask 是针对某一个 time_step 的, 而tau , r_on, s 以及算出来的 kt 是针对某一个
# 隐含层或者细胞状态 神经元的
tau = vs.get_variable('tau',
shape=[self._num_units], # 为每一个隐含层神经元,细胞状态神经元
# 分配一个tau--周期, r_on开放比例, s 相位
initializer=random_exp_initializer(0,
self.tau_init),
dtype=dtype)
r_on = vs.get_variable('r_on',
shape=[self._num_units],
initializer=init_ops.constant_initializer(self.r_on_init),
dtype=dtype)
s = vs.get_variable('s',
shape=[self._num_units],
initializer=init_ops.random_uniform_initializer(0.,
tau.initialized_value()),
dtype=dtype)
# tf.tile 的作用 是 rep
times = tf.tile(tf.reshape(t, [-1, 1]),
[1, self._num_units])
phase = phi(times, s, tau) # element-wise calculation
kappa = time_gate_fast(phase, r_on, self._leak_rate, self._training_phase)
w_o_peephole = None #
# 如果使用了窥视孔连接的话,那么就把细胞状态的线性组合连接到前面线性组合的
if self._use_peepholes:
w_i_peephole = vs.get_variable('W_I_peephole', shape=[self._num_units], dtype=dtype)
w_f_peephole = vs.get_variable('W_F_peephole', shape=[self._num_units], dtype=dtype)
w_o_peephole = vs.get_variable('W_O_peephole', shape=[self._num_units], dtype=dtype)
f += w_f_peephole * c_prev
i += w_i_peephole * c_prev
new_c_tilde = sigmoid(f) * c_prev + sigmoid(i) * self._activation(j)
if self._use_peepholes:
o += w_o_peephole * new_c_tilde
new_h_tilde = sigmoid(o) * self._activation(new_c_tilde)
"""
Hi all,
Yes, Philippe, you are correct in that Equation 4 should reference c_tilde and not c.
I can add a point to the paper to mention that, and will update Figure 1 so the line is
correctly drawn to c_tilde instead. The intuition here is that the gates should be blind
to the effect of the khronos gate; input, forget and output gate should all operate as if
the cell were a normal LSTM cell, while the khronos gate allows it to either operate or
not operate (and then linearly interpolates between these two states). If the output gate
is influenced by the khronos gate (if the peepholes reference c instead of c_tilde), then
the PLSTM would no longer be a gated LSTM cell, but somehow be self-dependent on the time gate's actual operation.
I think everyone's right in that it wouldn't influence much -- but it should be updated in
the paper. Thanks very much for pointing out the issue, Philippe!
-Danny"""
# Apply Khronos gate
new_h = kappa * new_h_tilde + (1 - kappa) * h_prev
new_c = kappa * new_c_tilde + (1 - kappa) * c_prev
new_state = (new_c, new_h)
# 根据采样频率更新 细胞状态
return new_h, new_state